/********************************************************************
**
** Copyright (c) 1989 Mark R. Nelson
**
** LZW data compression/expansion demonstration program.
**
** April 13, 1989
**
** Minor mods made 7/19/2006 to conform with ANSI-C - prototypes, casting, 
** and argument agreement.
**
*********************************************************************/

/*
 *
 * BogdanB addon  - Nov, 2007
 * Adapted to work with a memory stream of bytes (instead of a file specified as a parameter)
 * The stream of bytes mimics a pack of 10 pins embedded into field 126 of the 0210
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BITS 12                   /* Setting the number of bits to 12, 13*/
#define HASHING_SHIFT (BITS-8)    /* or 14 affects several constants.    */
#define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to   */
#define MAX_CODE MAX_VALUE - 1    /* compile their code in large model if*/
                                  /* 14 bits are selected.               */
#if BITS == 14
  #define TABLE_SIZE 18041        /* The string table size needs to be a */
#endif                            /* prime number that is somewhat larger*/
#if BITS == 13                    /* than 2**BITS.                       */
  #define TABLE_SIZE 9029
#endif
#if BITS <= 12
  #define TABLE_SIZE 5021
#endif

/* BogdanB addon */
#define NPAC 10
#define FLD_LIMIT 996
#define EXP_LIMIT 65535
#define LENOF_FLD126_0210 205
#define FLD126_0210 "\x30\x31\x30\x32\x30\
\x12\x34\x56\x78\x01\x23\x45\x67\x12\x11\x20\x08\x09\x87\x65\x43\x21\x09\x87\x65\
\x34\x12\x56\x23\x85\x34\x41\x89\x12\x11\x20\x08\x19\x27\x35\x53\x71\x89\x97\x62\
\x16\x35\x11\x79\x01\x23\x45\x67\x12\x11\x20\x08\x09\x87\x65\x43\x21\x09\x87\x62\
\x12\x24\x22\x38\x11\x23\x45\x17\x12\x11\x20\x08\x09\x87\x65\x43\x21\x09\x87\x63\
\x13\x14\x34\x28\x11\x33\x35\x27\x12\x11\x20\x08\x09\x87\x65\x43\x21\x09\x87\x64\
\x82\x74\x55\x18\x01\x24\x25\x27\x12\x11\x20\x08\x09\x87\x65\x43\x21\x09\x87\x65\
\x52\x39\x57\x79\x21\x23\x15\x37\x12\x11\x20\x08\x09\x87\x65\x43\x21\x39\x87\x66\
\x62\x30\x77\x78\x31\x73\x47\x47\x12\x11\x20\x08\x09\x87\x65\x63\x21\x09\x87\x67\
\x02\x33\x89\x58\x21\x63\x48\x57\x12\x11\x20\x08\x09\x87\x65\x43\x21\x09\x87\x68\
\x32\x32\x01\x68\x01\x53\x49\x69\x12\x11\x20\x08\x19\x84\x65\x43\x21\x09\x87\x69"

void *malloc();

int *code_value; /* This is the code value array */
unsigned int *prefix_code; /* This array holds the prefix codes */
unsigned char *append_character; /* This array holds the appended chars */
unsigned char decode_stack[EXP_LIMIT]; /* This array holds the decoded string */

/*
 * Forward declarations
 */
unsigned char *decode_string(unsigned char *buffer, unsigned int code);
int find_match(int hash_prefix, unsigned int hash_character);
/* BogdanB addon by adapting the originals */
char *mem_compress(char *input, int *nlen);
char *mem_output_code(unsigned int code, int *nlen);
unsigned char *mem_expand(char *input, int *nlen);
unsigned int mem_input_code(char *input, int *nlen);

/* BogdanB addon */
/*
 *
 * Instead of the original file based implementation - we will read the 
 * byte stream defined by the FLD126_0210 into memory - process it there
 * and compress it into a destination char* variable
 *
 * This is to demostrate the compression / decompression with an example
 * of memory based messages usage
 *
 */
main(int argc, char *argv[]){
  int i, nlen;

  /*
   ** The three buffers are needed for the compression phase.
   */
  code_value=(int*)malloc(TABLE_SIZE*sizeof(int));
  prefix_code=(unsigned int *)malloc(TABLE_SIZE*sizeof(unsigned int));
  append_character=(unsigned char *)malloc(TABLE_SIZE*sizeof(unsigned char));
  if(code_value==NULL || prefix_code==NULL || append_character==NULL){
    printf("Fatal error allocating table space!\n");
    exit(-1);
  }

  /* BogdanbB addon - instead of compressing the file - compress the stream of bytes */
  nlen=LENOF_FLD126_0210; char *compressed=mem_compress(FLD126_0210, &nlen);
  printf("Compressed len=%d\n", nlen);
  for(i=0; i<nlen; i++)
    printf("%02x ", (int)(unsigned char)compressed[i]);
  printf("\n");
  free(code_value);

  /* BogdanB addon - decompress a compressed stream of bytes */
  unsigned char *expanded=mem_expand(compressed, &nlen);
  printf("Expanded len=%d\n", nlen);
  for(i=0; i<nlen; i++)
    printf("%02x ", (int)(unsigned char)expanded[i]);
  printf("\n");
  free(expanded);
  free(compressed);

  free(append_character);
  free(prefix_code);
}

/* BogdanB addon by adapting the original */
char *mem_compress(char *input, int *clen){
  char *compressed=malloc(FLD_LIMIT+1);
  char *buff;
  int nC=0, k, kk;
  unsigned int next_code;
  unsigned int character;
  unsigned int string_code;
  unsigned int index;
  int i, nlen, input_len=*clen;

  *clen=0;
  next_code=256;              /* Next code is the next available string code*/
  for (i=0;i<TABLE_SIZE;i++)  /* Clear out the string table before starting */
    code_value[i]=-1;

  i=0;
  printf("Compressing %d bytes", input_len);
  string_code=(int)(unsigned char)input[0];    /* Get the first code                         */
  /*
   * step through the input string and compress
   */
  for(k=1; k<input_len; k++){ /* !!!! starting from the index=1 !!!!, since index=0 is already processed */
    character=(int)(unsigned char)input[k];
    if(++i==NPAC){ /* prints out a pacifier @ every NPAC characters */
      i=0; printf(".");
    }
    index=find_match(string_code,character);/* See if the string is in */
    if (code_value[index] != -1)            /* the table.  If it is,   */
      string_code=code_value[index];        /* get the code value.  If */
    else{                                   /* the string is not in the table, try to add it */
      if (next_code <= MAX_CODE){
        code_value[index]=next_code++;
        prefix_code[index]=string_code;
        append_character[index]=character;
      }
      nlen=0; buff=mem_output_code(string_code, &nlen);
      for(kk=0; kk<nlen; kk++)compressed[nC++]=buff[kk];
      *clen+=nlen;
      free(buff);
      string_code=character;            /* that is not in the table*/
    }                                   /* I output the last string*/
  }                                     /* after adding the new one*/
  /*
   ** End of the main loop.
   */
  nlen=0; buff=mem_output_code(string_code, &nlen); /* output the last code */
  for(kk=0; kk<nlen; kk++)compressed[nC++]=buff[kk];
  *clen+=nlen;
  free(buff);

  nlen=0; buff=mem_output_code(MAX_VALUE, &nlen); /* output the end of buffer code */
  for(kk=0; kk<nlen; kk++)compressed[nC++]=buff[kk];
  *clen+=nlen;
  free(buff);

  nlen=0; buff=mem_output_code(0, &nlen); /* flush the output buffer */
  for(kk=0; kk<nlen; kk++)compressed[nC++]=buff[kk];
  *clen+=nlen;
  free(buff);
  printf(" done\n");

  return compressed;
}

/* BogdanB addon by adapting the original */
char *mem_output_code(unsigned int code, int *nlen){
  char *output=malloc(FLD_LIMIT+1);
  static int output_bit_count=0;
  static unsigned long output_bit_buffer=0L;
  int i=0;

  output_bit_buffer |= (unsigned long) code << (32-BITS-output_bit_count);
  output_bit_count += BITS;
  while(output_bit_count >= 8){
    output[i++]=output_bit_buffer >> 24;
    output[i]='\0';

    output_bit_buffer <<= 8;
    output_bit_count -= 8;
  }
  *nlen=i;

  return output;
}

/* BogdanB addon by adapting the original */
unsigned char *mem_expand(char *input, int *elen){
  unsigned char *expanded=malloc(EXP_LIMIT+1);
  int nE=0, k, nlen;
  unsigned int next_code;
  unsigned int new_code;
  unsigned int old_code;
  int character;
  int counter;
  unsigned char *string;
  int input_len=*elen;

  *elen=0;
  next_code=256;           /* This is the next available code to define */
  counter=0;               /* Counter is used as a pacifier            */
  printf("Expanding %d bytes", input_len);

  nlen=0; old_code=mem_input_code(input, &nlen); for(k=0; k<nlen; k++)*input++;
  character=old_code;          /* initialize the character variable */
  expanded[nE++]=old_code;

  nlen=0;
  /*
   **  This is the main expansion loop.  It processes characters from the LZW stream of bytes
   **  until it sees the special code used to indicate the end of the data.
   */
  while((new_code=mem_input_code(input, &nlen)) != (MAX_VALUE)){
    for(k=0; k<nlen; k++)*input++; /* inc the pointer the number of times it's been processed by function call */
    nlen=0;
    if(++counter==NPAC){ /* prints out a pacifier @ every NPAC characters */
      counter=0; printf(".");
    }
    /*
     ** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
     ** case which generates an undefined code.  It handles it by decoding
     ** the last code, and adding a single character to the end of the decode string
     */
    if(new_code>=next_code){
      *decode_stack=character;
      string=decode_string(decode_stack+1, old_code);
    }
    /*
     ** Otherwise we do a straight decode of the new code.
     */
    else
      string=decode_string(decode_stack, new_code);
    /*
     ** Now we write the decoded string in reverse order
     */
    character=*string;
    while(string >= decode_stack)
      expanded[nE++]=*string--;
    /*
     ** Finally, if possible, add a new code to the string table
     */
    if(next_code <= MAX_CODE){
      prefix_code[next_code]=old_code;
      append_character[next_code]=character;
      next_code++;
    }
    old_code=new_code;
  }
  printf(" done\n");
  *elen=nE;

  expanded[nE]='\0';
  return expanded;
}

/*
** This routine simply decodes a string from the string table, storing
** it in a buffer.  The buffer can then be output in reverse order by
** the expansion program
*/
unsigned char *decode_string(unsigned char *buffer, unsigned int code){
  int i=0;

  while(code>255){
    *buffer++ = append_character[code];
    code=prefix_code[code];
    if(i++>=MAX_CODE){
      printf("Fatal error during code expansion!\n");
      exit(-2);
    }
  }
  *buffer=code;

  return buffer;
}

/* BogdanB addon by adapting the original */
unsigned int mem_input_code(char *input, int *nlen){
  unsigned int return_value;
  static int input_bit_count=0;
  static unsigned long input_bit_buffer=0L;
  int i=0;

  while(input_bit_count <= 24){ 
    input_bit_buffer |= 
      (unsigned char) input[i++] << (24-input_bit_count);
    input_bit_count += 8;
  }
  *nlen=i;

  return_value=input_bit_buffer >> (32-BITS);
  input_bit_buffer <<= BITS;
  input_bit_count -= BITS;

  return return_value;
}

/*
** This is the hashing routine.  It tries to find a match for the prefix+char
** string in the string table.  If it finds it, the index is returned.  If
** the string is not found, the first available index in the string table is
** returned instead.
*/
int find_match(int hash_prefix, unsigned int hash_character){
  int index;
  int offset;

  index = (hash_character << HASHING_SHIFT) ^ hash_prefix;
  if(index == 0)
    offset = 1;
  else
    offset = TABLE_SIZE - index;
  while(1){
    if(code_value[index] == -1)
      return(index);
    if(prefix_code[index] == hash_prefix && 
        append_character[index] == hash_character)
      return(index);
    index -= offset;
    if(index < 0)
      index += TABLE_SIZE;
  }
}
