#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#include"library.h"
#include"parse.h"

/* returnwerte:
-1: option error
-2: file error
-3: out of memory */
#define tolower(c)  (('A'<=(c))&&((c)<='Z')? (c)^0x20 : (c))
#define islower(c)  (('a'<=(c))&&((c)<='z'))

int checking = 0, merging = 0, case_sensitive = 0;
int first_checking = 1;
int newlibcreate = 1;
int library_statistics = 0;


void print_help(void);
void lowercase(unsigned char *string);
void write_statistics( library *mainlib );


int main(int argc, const unsigned char *argv[])
{
  unsigned char *wort;
  FILE *texfile, *outfile, *infofile, *newlibfile;
  library *mainlib, *newlib;
  const unsigned char *newlibname=NULL, *outputname=NULL, *texfilename = NULL, *infofilename = NULL;
  unsigned char libnamespace[80], infofilenamespace[] = "TeXortho.inf";
  
  printf("This is TeXortho, version 1.0, by J.Hannappel and E.Werner \n");
  printf("written with GNU C, 1991\n\n");
  
  if ( argc < 3 )
    {
      print_help();
      return(-1);
    } 
  punctuation_check = 1;
  new_sentence = 1;		/* 1.11.91 */
  infofilename = infofilenamespace;
  outfile = stdout;
  mainlib = library_create( 30000 );
  /********** parsen der command line */
  
  {
    int i;
    int inputfilefound = 0;
    int optionfound = 0 , libfilefound = 0;
    for ( i = 1; i < argc; i++ )
      {
	if ( argv[i][0] == '-')  /* wir haben eine option */
	  {
	    
	    switch( argv[i][1])
	      {
	      case 'h':        /* libhist schreiben */
		library_statistics = 1;
		break;
	      case 'i':
		infofilename = argv[++i];
		break;
	      case 'l':       /* name des erzeugten libfiles, default *.lib */
		newlibname = argv[++i];
		break;
	      case 'o':       /* name des outfiles, default *.out */
		outputname = argv[++i];
		break;
	      case 'C':       /* case sensitive search */
		case_sensitive = 1;
		break;
	      case 'm':       /* merge file with lib */
		optionfound = 1;
		merging = 1;
		break;          
	      case 'c':       /* check file for spelling errors */
		checking = 1;
		optionfound = 1;
		break;
	      case 'p':       /* supress check of isolated punctuation chars */
		punctuation_check = 0;
		break;
	      case 'n':       /* don't create  libfile */
		newlibcreate = 0;
		break;
	      case 'P':       /* supress check of capital letters at the beginnings of sentences */
		first_checking = 0;
		break;
	      default:
		printf("invalid option! aborting...\n" );
		return(-1);
	      }
	  }
	else                     /* wir haben keine option */
	  {
	    if (inputfilefound)
	      {
		FILE *libfile;
		libfilefound = 1;
		if(!(libfile = fopen( argv[i],"r")))
		  {
		    printf("could not open word list %s \n",argv[i]);
		    return(-2);
		  }
		printf("(%s",argv[i]);
		if(!library_read( mainlib, libfile ))
		  {
		    printf("could not read word list %s \n",argv[i]);
		    return(-3);
		  }
		fclose( libfile );
		printf(")");
	      }                   /* if inputfilefound */
	    else                /* dann isses das texfile */
	      {
		texfilename = argv[i];
		inputfilefound = 1;
		if( newlibname == NULL )
		  {
		    strcpy(libnamespace,argv[i]);
		    newlibname = libnamespace;
		    strcpy(strrchr(newlibname,'.'), ".twl");
		  }               /* if newlibname == NULL */
	      }
	  }                       /* else no option */
      }                           /* for i < argc */
    if( !optionfound || !inputfilefound) 
      {
	printf("something's missing; aborting...\n");
	return(-1);
      }    
  }                              /* lokale klammer */
  if( merging && checking ) 
    {
      printf("-c and -m are incompatible options !\n");
      return(-1);
    }
  if( !merging )
    {
      if(!( texfile = fopen( texfilename,"r")))
	{
	  printf("could not open textfile %s \n",texfilename);
	  return(-2);
	}
      printf("(%s",texfilename);
      if ( outputname )
	{
	  if(!( outfile = fopen( outputname,"w")))
	    {
	      printf("could not open outputfile %s \n",outputname);
	      return(-2);
	    }
	  printf("(%s",outputname);
	}
      if(!( infofile = fopen( infofilename,"r")))
	{
	  printf("could not open infofile %s \n",infofilename);
	  return(-2);
	}
      printf( "(%s", infofilename);
      init_parser( infofile, texfile, outfile ,texfilename);
      fclose(infofile);
      printf(")");
    }
  if( newlibcreate && !merging ) if( NULL == ( newlib = library_create( 30000 )))
    {
      printf("could not create a new word list, probably out of memory...\n"); 
      return( -2 );
    }
  if( merging )
    {
      FILE *newlibfile;
      
      printf("merging %s to %s ...\n", texfilename, newlibname );
      if (!(newlibfile = fopen( texfilename, "r")))
	{
	  printf("could not open word list file %s \n",texfilename);
	  return(-2);
        }
      printf("(%s",texfilename);
      library_read(mainlib, newlibfile );
      fclose( newlibfile );
      printf(")");
      if (!(newlibfile = fopen( newlibname, "w")))
	{
	  printf("could not open otput library %s \n",newlibname);
	  return(-2);
	}
      printf("(%s",newlibname);
      printf(" reorganizing...\n");
      library_reorganize(mainlib);
      printf(" writing...\n");
      library_write( mainlib, newlibfile );
      fclose(newlibfile);
      printf(")\n");
      if ( library_statistics ) write_statistics( mainlib );     
      
      library_delete( mainlib );
      return(0);
    }
  /***************************  nun bis zum TeXfileende ******************/
  printf("start parsing...\n");
  
  while (  wort = next_word( &new_sentence ))
    {
      if ( first_checking )
	if ( new_sentence )
	  {
	    new_sentence = 0;
	    if ( !check_first_char( wort) )
	      fprintf( outfile,"%s:%d: capital letter expected after presumed end of sentence\n",
		       texfilename, zeilennummer );
	  }
      if( !case_sensitive )lowercase( wort );          
      if(library_find_entry( mainlib, wort ));
      else 
	{
	  if (newlibcreate) library_enter_entry( newlib, wort, 1 );
	  fprintf(outfile, "%s:%d: unknown %s\n", texfilename, zeilennummer, wort);
	}
    }
  if ( library_statistics ) write_statistics( mainlib );     
  library_delete( mainlib ); 
  if (outfile != stdout)
    {
      fclose( outfile );
      printf(")\n");
    }
  fclose( texfile );
  printf(")\n");
  if( newlibcreate )
    {
      if (!(newlibfile = fopen( newlibname, "w")))
	{
	  printf("could not open output word list %s \n",newlibname);
	  return(-2);
	}
      printf("(%s",newlibname);
      library_write( newlib, newlibfile );
      fclose(newlibfile);
      printf(")\n");
      library_delete( newlib ); 
    }
  return(0);  
}                                   /* end of main */


void print_help(void)
{
  printf("possible options:\n");
  printf(" -c\tcheck textfile for errors\n");
  printf(" -m\tmerge word lists\n");
  printf("exactly one of the above options is required\n");
  printf(" -i\tnext parameter is the info file name; default: TeXortho.inf\n");
  printf(" -l\tnext parameter is the output word list name; default: jobname.twl\n");
  printf(" -o\tnext parameter is the output file name; default: jobname.out\n");
  printf(" -C\tcase sensitive search; be sure your library is made for that!\n");
  printf(" -p\tsupress punctuation check\n");
  printf(" -P\tsupress check for capital letters at the beginnings of sentences\n");   
  printf(" -n\tdon't create output word list\n");
  printf(" -h\tdo write a library histogram libhist\n");
  printf("\nfirst file name is TeX(t) file, the others are word lists\n");
  printf("for additional help, check the manual\n");
}

void lowercase( unsigned char *string )
{
  while ( *string = tolower(*string) )string++;
}                                   /* lowercase() */



void write_statistics( library *mainlib )	 
{
  FILE *newlibfile;
  if ((newlibfile = fopen( "libhist", "w")))
    {
      long tree_hist[250];
      long weight_hist[250];
      long deepest_branch;
      int i;
      long oldnodes;
      float treespace;
      
      printf("writing library statistics on LIBHIST\n");
      for (i=0;i<250;i++) tree_hist[i] = weight_hist[i] = 0;
      deepest_branch = 0;
      
      library_fill_hist( mainlib,tree_hist,weight_hist,249,&deepest_branch );
      
      fprintf(newlibfile,"deepest branch is %d deep!\n",deepest_branch);
      
      oldnodes = 0;
      treespace = 1.0;
      for ( i = 0; i < (deepest_branch < 249? deepest_branch+1: 250); i++ ) 
	{
	  fprintf(newlibfile,"%2d %10d %10d %10d %10.6f %10.3f\n"
		  ,i,tree_hist[i],weight_hist[i],oldnodes - tree_hist[i],
		  (float)tree_hist[i]/treespace,
		  (float)weight_hist[i]/(float)tree_hist[i]);
	  oldnodes = tree_hist[i] * 2;
	  treespace *= 2.0;
	}
      fclose(newlibfile);
    }
  else
    printf("could not open word list statistics file....\n");
} 










