/*
    Tango/Weevil - A WEB Tangler and Weaver
    Copyright (C) 1995 Corey Minyard

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    Corey Minyard - minyard@metronet.com
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tango.h"

#define GLOBAL			1
#define PROC_DEFN		2
#define INPROC			3
#define PROC_CAN_DEF		4

#define DEFINE			3
#define VARDEF			4
#define FOUND_OPER		5

#define OPER			6
#define NAME			7
#define NONE			8
#define PERIOD			9
#define RESERVED_WORD		10
#define SEMI			11
#define NUMSIGN			12
#define STRUCT			13

typedef struct
{
   int code_state;
   int last_token;
   int nest_level;
   int sqbrace_nest;
   int paren_level;
   bool name_seen;
   bool is_static;
   bool in_define;
   bool in_number;
   bool define_name_found;
   bool lastescape;
   char quotechar;
   bool in_struct;
   bool in_enum;
   bool in_enum_define;
   bool in_extern_c;
   int  struct_nest;
   bool comment_after_star;
   t_namelist *local_vars;
   t_namelist *last_name;
   int rword;
   int extern_c_nest;
} t_codeinfo_c;

#define WHILE_RW	1
#define DO_RW		2
#define FOR_RW		3
#define IF_RW		4
#define SWITCH_RW	5
#define CASE_RW		6
#define DEFAULT_RW	7
#define GOTO_RW		8
#define CONTINUE_RW	9
#define BREAK_RW	10
#define RETURN_RW	11
#define AUTO_RW		12
#define REGISTER_RW	13
#define STATIC_RW	14
#define EXTERN_RW	15
#define TYPEDEF_RW	16
#define CONST_RW	17
#define VOLATILE_RW	18
#define STRUCT_RW	19
#define UNION_RW	20
#define SIZEOF_RW	21
#define ELSE_RW		22
#define ENUM_RW		23

static struct rword_s
{
   char *name;
   int  val;
   int  length;
} rwords[] =
{
   { "while",		WHILE_RW,	5 },
   { "do",		DO_RW,		2 },
   { "for",		FOR_RW,		3 },
   { "if",		IF_RW,		2 },
   { "switch",		SWITCH_RW,	6 },
   { "case",		CASE_RW,	4 },
   { "default",		DEFAULT_RW,	7 },
   { "goto",		GOTO_RW,	4 },
   { "continue",	CONTINUE_RW,	8 },
   { "break",		BREAK_RW,	5 },
   { "return",		RETURN_RW,	6 },
   { "auto",		AUTO_RW,	4 },
   { "register",	REGISTER_RW,	8 },
   { "static",		STATIC_RW,	6 },
   { "extern",		EXTERN_RW,	6 },
   { "typedef",		TYPEDEF_RW,	7 },
   { "const",		CONST_RW,	5 },
   { "volatile",	VOLATILE_RW,	8 },
   { "struct",		STRUCT_RW,	6 },
   { "union",		UNION_RW,	5 },
   { "sizeof",		SIZEOF_RW,	6 },
   { "else",		ELSE_RW,	4 },
   { "enum",		ENUM_RW,	4 }
};
static const int num_rw = (sizeof(rwords) / sizeof(struct rword_s));

static int
is_reserved_word(char *name,
		 int  length)
{
   int i;

   for (i=0; i<num_rw; i++)
   {
      if (   (length == rwords[i].length)
          && (strncmp(name, rwords[i].name, length) == 0))
      {
	 return(rwords[i].val);
      }
   }

   return(-1);
}

static char *(intrinsic_types[]) =
{
   "int", "void", "char", "long", "unsigned", "float", "double",
   "short", NULL
};

static bool
is_intrinsic_type(char *name)
{
   int i;

   for (i=0; intrinsic_types[i] != NULL; i++)
   {
      if (strcmp(name, intrinsic_types[i]) == 0)
      {
         break;
      }
   }

   return(intrinsic_types[i] != NULL);
}

static void
add_name_define(t_lptangodat *lptd,
		t_namelist   *item,
		t_codeinfo_c *ci)
{
   if (! (lptd->auto_xref))
   {
      free_namelist_item(lptd, item);
   }
   else if (ci->in_define)
   {
      list_insert_unique(lptd, &(lptd->curr_macro->pounddefs), item);
   }
   else if (! ci->in_struct)
   {
      switch(ci->code_state)
      {
       case GLOBAL:
         if (ci->is_static)
         {
	    list_insert_unique(lptd, &(lptd->curr_macro->staticdefs), item);
         }
         else
         {
	    list_insert_unique(lptd, &(lptd->curr_macro->globaldefs), item);
         }
	 break;

       case PROC_DEFN:
       case PROC_CAN_DEF:
	 list_insert_unique(lptd, &(ci->local_vars), item);
	 item->u.nest_val = ci->nest_level;
	 break;

       default:
	 free_namelist_item(lptd, item);
      }
   }
   else
   {
      free_namelist_item(lptd, item);
   }
}

static void
add_name_use(t_lptangodat *lptd,
	     t_namelist   *item,
	     t_codeinfo_c *ci)
{
   if (! (lptd->auto_xref))
   {
      free_namelist_item(lptd, item);
   }
   else if (find_name_in_list(lptd, ci->local_vars, item) != NULL)
   {
      /* Don't track uses of local variables. */
      free_namelist_item(lptd, item);
   }
   else if (!is_intrinsic_type(item->name))
   {
      list_insert_unique(lptd, &(lptd->curr_macro->uses), item);
   }
   else
   {
      free_namelist_item(lptd, item);
   }
}

static void
destroy_locals(t_lptangodat *lptd,
	       t_codeinfo_c *ci)
{
   t_namelist *hold;


   while (   (ci->local_vars != NULL)
          && (ci->local_vars->u.nest_val >= ci->nest_level))
   {
      hold = ci->local_vars;
      ci->local_vars = hold->next;
      free_namelist_item(lptd, hold);
   }
}

void
c_scan_input(t_lptangodat *lptd,
	     char         *line,
	     int          length,
	     int          lineno)
{
   char *cp;
   char *start_tok;
   int count;
   t_namelist *defname;
   t_codeinfo_c *ci;

   ci = lptd->code_info;
   count = 0;
   cp = line;
   start_tok = NULL;
   while (count < length)
   {
      if (lptd->instring)
      {
	 while (TRUE)
	 {
	    if (   (*cp == ci->quotechar)
                && (! ci->lastescape))
	    {
	       lptd->instring = FALSE;
	       break;
	    }

	    if (count >= length)
	    {
	       break;
	    }
	    if (   (*cp == '\\')
                && (! ci->lastescape))
	    {
               ci->lastescape = TRUE;
	    }
	    else
	    {
               ci->lastescape = FALSE;
	    }
	    cp++;
	    count++;
         }
         if (   (ci->last_token == RESERVED_WORD)
             && (ci->rword == EXTERN_RW)
             && (count > 2)
             && ((*(cp-1) == 'C') && (*(cp-2) == '"')))
         {
            ci->in_extern_c = TRUE;
            ci->extern_c_nest = ci->nest_level;
         }
         ci->last_token = NAME;
      }
      else if (lptd->in_comment)
      {
	 while (TRUE)
	 {
	    if (count >= length)
	    {
	       break;
	    }
            else
            {
	       if (   (ci->comment_after_star)
		   && (*cp == '/'))
	       {
	          lptd->in_comment = FALSE;
	          break;
	       }

	       if (*cp == '*')
	       {
	          ci->comment_after_star = TRUE;
	       }
	       else
	       {
	          ci->comment_after_star = FALSE;
	       }
            }

	    cp++;
	    count++;
         }
      }
      else if (   ((*cp >= 'a') && (*cp <= 'z'))
	       || ((*cp >= 'A') && (*cp <= 'Z'))
	       || ((start_tok != NULL) && ((*cp >= '0') && (*cp <= '9')))
	       || (*cp == '_'))
      {
	 if ((start_tok == NULL) && (!ci->in_number))
	 {
	    start_tok = cp;
	 }
      }
      else if (   ((*cp >= '0') && (*cp <= '9'))
               || (   ci->in_number
                   && (   (*cp == '.')
                       || (*cp == '+')
                       || (*cp == '-'))))
      {
         ci->in_number = TRUE;
      }
      else
      {
         ci->in_number = FALSE;

	 if (ci->in_define)
	 {
            if (   (*cp == '/') 
                && ((count+1) < length)
                && (*(cp+1) == '*'))
            {
               cp++;
               count++;
               lptd->in_comment = TRUE;
            }
	    else if (ci->last_token == NUMSIGN)
	    {
	       ci->last_token = NONE;

	       /* got a # directive.  We are only really
		  interested in defines */
	       if (   (start_tok != NULL)
		   && ((cp - start_tok) == 6)
		   && (strncmp(start_tok, "define", 6) == 0))
	       {
		  ci->define_name_found = FALSE;
	       }
	       else
	       {
		  ci->define_name_found = TRUE;
	       }
	    }
	    else if (   (start_tok != NULL)
		     && (! ci->define_name_found))
	    {
	       defname = create_namelist_item(lptd,
					      start_tok,
					      cp - start_tok);
	       add_name_define(lptd, defname, ci);
	       ci->define_name_found = TRUE;
	    }
	    else if (   (*cp == '\\')
                     && ((count+1) < length))
            {
               cp++;
               count++;
	    }
	    else if (*cp == '\n')
	    {
	       ci->in_define = FALSE;
	    }
	    start_tok = NULL;
	 }
	 else
	 {
	    if (   (start_tok != NULL) /* We just finished with a name */
		&& (ci->last_token != PERIOD)) /* Ignore struct elements */
	    {
	       ci->rword = is_reserved_word(start_tok, cp - start_tok);
	       if (ci->rword != -1)
	       {
		  ci->last_token = RESERVED_WORD;
		  
		  /* If static, save that for later. */
		  if (ci->rword == STATIC_RW)
		  {
		     ci->is_static = TRUE;
		  }
		  if (   (   (ci->rword == STRUCT_RW)
			  || (ci->rword == UNION_RW))
		      && (! ci->in_struct))
		  {
		     ci->in_struct = TRUE;
		     ci->struct_nest = ci->nest_level;
		     ci->last_token = STRUCT;
		  }
                  else if (ci->rword == ENUM_RW)
                  {
		     ci->in_enum = TRUE;
                  }
		  /* no other reserved words really matter. */
	       }
		  
	       /* The first name in a line is a name use, the rest
		  must be definitions unless then are in {}.  Also,
                  a name right after struct or union is a define */
	       else
	       {
		  if (ci->code_state == GLOBAL)
		  {
                     /* The nesting level must be 0 or at the same
                        level as an extern "C", a name see, and not
                        in square braces. */
                     if (   (   (ci->nest_level == 0)
                             || (   (ci->in_extern_c)
                                 && (ci->nest_level <= (ci->extern_c_nest+1)))
			     || (ci->in_struct))
			 && (ci->name_seen)
			 && (ci->sqbrace_nest == 0))
		     {
			/* This is a variable or type declaration. */
			ci->last_name = create_namelist_item(lptd,
							     start_tok,
							     cp - start_tok);
			add_name_define(lptd, ci->last_name, ci);
		     }
                     /* Everything in an enum is a definition */
		     else if (ci->in_enum_define)
		     {
			/* This is a variable or type declaration. */
			ci->last_name = create_namelist_item(lptd,
							     start_tok,
							     cp - start_tok);
			add_name_define(lptd, ci->last_name, ci);
		     }
		     else
		     {
			/* This is a variable or type use. */
			
			ci->name_seen = TRUE;
			ci->last_name = create_namelist_item(lptd,
							     start_tok,
							     cp - start_tok);
			add_name_use(lptd, ci->last_name, ci);
		     }
		  }
		  else
		  {
		     /* Handle a name in a procedure or parameter list. */
		     if (   (   (ci->code_state == PROC_CAN_DEF)
			     || (ci->code_state == PROC_DEFN))
			 && (ci->name_seen)
			 && (ci->sqbrace_nest == 0))
		     {
			/* This is a variable or type declaration. */
			ci->last_name = create_namelist_item(lptd,
							     start_tok,
							     cp - start_tok);
			add_name_define(lptd, ci->last_name, ci);
		     }
		     else
		     {
			/* This is a variable or type use. */
			
			ci->name_seen = TRUE;
			ci->last_name = create_namelist_item(lptd,
							     start_tok,
							     cp - start_tok);
			add_name_use(lptd, ci->last_name, ci);
		     }
		  }
	          ci->last_token = NAME;
	       }
	    }
	    start_tok = NULL;
	    
	    switch(*cp)
	    {
	     case ' ': case '\t':
	       break;
	       
	     case ';':
	       if (   (   (ci->nest_level == 0)
                       || (   (ci->in_extern_c)
                           && (ci->nest_level <= (ci->extern_c_nest+1))))
		   && (ci->code_state == INPROC))
	       {
		  destroy_locals(lptd, ci);
		  ci->code_state = GLOBAL;
	       }
	       if (   (ci->in_struct)
		   && (ci->nest_level <= ci->struct_nest))
	       {
		  ci->in_struct = FALSE;
	       }

	       ci->last_name = NULL;
	       ci->is_static = FALSE;
	       ci->name_seen = FALSE;
	       
	       ci->last_token = SEMI;
	       break;
	       
	     case ',':
	       if (ci->code_state == PROC_DEFN)
	       {
		  /* In a list of procedure parameters, make commas
		     look like semicolons. */
		  ci->last_name = NULL;
		  ci->last_token = SEMI;
		  ci->is_static = FALSE;
		  ci->name_seen = FALSE;
	       }
	       else if (ci->last_token != NAME)
	       {
		  /* Probably passing parameters to a procedure call,
		     just ignore. */
	       }
	       break;
	       
	     case ')':
	       ci->paren_level--;
	       if (  (ci->paren_level == 0)
		   && (ci->code_state == PROC_DEFN))
	       {
		  ci->code_state = INPROC;
	       }
	       break;
	       
	     case '=':
	       if (ci->code_state == PROC_CAN_DEF)
	       {
		  /* We've seen an assign, we are no longer
		     in definition mode. */
		  ci->code_state = INPROC;
		  
		  if (   (ci->last_name != NULL)
		      && (ci->last_name == ci->local_vars))
		  {
		     /*  I thought the last name was a define,
			 it is acually a use. */
		     ci->local_vars = ci->last_name->next;
		     
		     add_name_use(lptd, ci->last_name, ci);
		  }
	       }
	       
	       ci->last_token = OPER;
	       break;
	       
	     case '(':
	       if (   (ci->code_state == GLOBAL)
		   && (ci->last_token == NAME))
	       {
		  ci->code_state = PROC_DEFN;
		  ci->paren_level = 1;
		  ci->last_name = NULL;
		  ci->name_seen = FALSE;
	       }
	       else
	       {
		  ci->paren_level++;
		  if (   (ci->last_token == NAME)
		      && (ci->code_state == PROC_CAN_DEF)
		      && (ci->last_name != NULL)
		      && (ci->last_name == ci->local_vars))
		  {
		     /*  I thought the last name was a define,
			 it is acually a use. */
		     ci->local_vars = ci->last_name->next;
		     
		     add_name_use(lptd, ci->last_name, ci);
		  }
	       }
	       break;

	     case '.':
	       ci->last_token = PERIOD;
	       break;

	     case '-':
	       if (   ((count+1) < length)
		   && (*(cp+1) == '>'))
	       {
		  cp++;
		  count++;
		  /* Even though '->' is not a period, it works
		     about the same for our purposes. */
		  ci->last_token = PERIOD;
	       }
	       else
	       {
		  ci->last_token = OPER;
	       }
	       break;

	     case '{':
	       if (ci->code_state == INPROC)
	       {
		  ci->code_state = PROC_CAN_DEF; /* Can define local
						    vars now. */
	       }
	       else if (   (ci->code_state == GLOBAL)
		        && (ci->last_name != NULL)
		        && (lptd->curr_macro->uses != NULL)
		        && (lptd->curr_macro->uses == ci->last_name)
                        && (   (   (ci->in_struct)
		                && (ci->struct_nest == ci->nest_level))
		            || (ci->in_enum)))
               {
                  /* I thought the last item was a use, it was actually
                     a structure or enum define. */
		  lptd->curr_macro->uses = ci->last_name->next;
		     
                  if (ci->in_struct)
                  {
                     ci->in_struct = FALSE;
		     add_name_define(lptd, ci->last_name, ci);
                     ci->in_struct = TRUE;
                  }
                  else
                  {
                     ci->in_enum_define = TRUE;
		     add_name_define(lptd, ci->last_name, ci);
                  }
               }
	       ci->nest_level++;
	       ci->last_name = NULL;
	       ci->name_seen = FALSE;
	       
	       ci->last_token = NONE;
	       break;
	       
	     case '}':
               /* Make sure to get rid of anything declared in this block */
	       destroy_locals(lptd, ci);
	       ci->nest_level--;

               ci->in_enum = FALSE; /* an rbrace will alway finish an enum */
               ci->in_enum_define = FALSE;
	       
               /* check to see if this is the end of an extern "C" { } */
               if ((ci->in_extern_c) && (ci->extern_c_nest == ci->nest_level))
               {
                  ci->in_extern_c = FALSE;
               }

	       /* An end brace can be like a name, such as at the end of
		  a structure definition. */
	       ci->last_token = NAME;
	       ci->name_seen = TRUE;

	       if (   (ci->in_struct)
		   && (ci->struct_nest >= ci->nest_level))
	       {
		  ci->in_struct = FALSE;
	       }

	       if (  (   (ci->nest_level == 0)
                      || (   (ci->in_extern_c)
                          && (ci->nest_level <= (ci->extern_c_nest+1))))
		   && (   (ci->code_state == INPROC)
		       || (ci->code_state == PROC_CAN_DEF)))
	       {
		  ci->code_state = GLOBAL;
		  ci->name_seen = FALSE;
		  ci->last_token = NONE;
	       }
	       else if (ci->code_state == PROC_CAN_DEF)
	       {
		  ci->code_state = INPROC;
	       }
	       break;

	     case '"':
	     case '\'':
	       ci->quotechar = *cp;
	       lptd->instring = TRUE;
	       ci->lastescape = FALSE;
	       break;
	       
	     case '[':
	       (ci->sqbrace_nest)++;
	       break;

	     case ']':
	       (ci->sqbrace_nest)--;
	       break;

	     case '#':
	       ci->in_define = TRUE;
	       ci->last_token = NUMSIGN;
	       break;

	     case '/':
	       if (   ((count+1) < length)
		   && (*(cp+1) == '*'))
	       {
		  cp++;
		  count++;
		  lptd->in_comment = TRUE;
		  ci->comment_after_star = FALSE;
	       }
	       else
	       {
		  ci->last_token = OPER;
	       }
	       break;
	       
	     default:
	       ci->last_token = OPER;
	       break;
	    }
	 }
      }
	 
      cp++;
      count++;
   }
}

void
c_output_linenum(t_lptangodat *lptd,
                 int          lineno,
                 char         *filename)
{
   fprintf(lptd->outfile, "#line %d \"%s\"\n", lineno, filename);
}

void
init_c_lang(t_lptangodat *lptd)
{
   t_codeinfo_c *ci;

   lptd->code_info = malloc(sizeof(t_codeinfo_c));
   if (lptd->code_info == NULL)
   {
      fprintf(stderr, "Unable to allocate enough memory\n");
      exit(1);
   }

   ci = lptd->code_info;
   ci->code_state = GLOBAL;
   ci->last_token = NONE;
   ci->nest_level = 0;
   ci->sqbrace_nest = 0;
   ci->paren_level = 0;
   ci->name_seen = FALSE;
   ci->is_static = FALSE;
   ci->in_define = FALSE;
   ci->in_number = FALSE;
   ci->define_name_found = FALSE;
   ci->lastescape = FALSE;
   ci->comment_after_star = FALSE;
   ci->quotechar = '\0';
   ci->local_vars = NULL;
   ci->last_name = NULL;
   ci->in_struct = FALSE;
   ci->in_enum = FALSE;
   ci->in_enum_define = FALSE;
   ci->in_extern_c = FALSE;
   ci->struct_nest = 0;
}
