/* #module    GloScan    "2-001"
 ***********************************************************************
 *                                                                     *
 * The software was developed at the Monsanto Company and is provided  *
 * "as-is".  Monsanto Company and the auther disclaim all warranties   *
 * on the software, including without limitation, all implied warran-  *
 * ties of merchantabilitiy and fitness.                               *
 *                                                                     *
 * This software does not contain any technical data or information    *
 * that is proprietary in nature.  It may be copied, modified, and     *
 * distributed on a non-profit basis and with the inclusion of this    *
 * notice.                                                             *
 *                                                                     *
 ***********************************************************************
 */
/*
 * Module Name:	GloScan
 *
 * Author:	R L Aurbach	CR&DS MIS Group    20-Aug-1986
 *
 * Function:
 *	Scan a file for glossary definitions and build the internal data
 *	structure for the data.
 *
 * Modification History:
 *
 * Version     Initials	   Date		Description
 * ------------------------------------------------------------------------
 * 1-001	RLA	20-Aug-1986	Original Code
 * 1-002	RLA	25-Aug-1986	Make Glo_Parse_Item more sophisticated.
 * 2-001	F.H.	17-May-1991	converted to portable C
 */
/*
 * Module GloScan - Module-Wide Data Description Section
 *
 * Include Files:
 */
#ifdef MSDOS
#include <stdlib.h>
#include <io.h>
#define F_OK		0	/* access(): File exists	*/
#else
#include <sys/file.h>
extern char *sprintf();
#endif
#include <string.h>
#include <ctype.h>
#include <malloc.h>
#include <stdio.h>
#include "GloDef.h"
/*
 * Module Definitions:
 */
/*
 * Global Declarations:
 */
#ifdef MSDOS
int	Glo_Scan_File(STRING_PTR file);
#else
int	Glo_Scan_File();
#endif
/*
 * Static Declarations:
 */
#ifdef MSDOS
static int	glo_copy_text(char *line, NODE_PTR node);
static int	glo_match_label(char *token);
static int	glo_link_node(NODE_PTR node);
static int	glo_parse_item(char *line, char *label, char *item, int *ptr);
#else
static int	glo_copy_text();
static int	glo_match_label();
static int	glo_link_node();
static int	glo_parse_item();
#endif
/*
 * External References:
 */
#ifdef MSDOS
extern void idx_build_spell_string(char *desc);
#else
extern void idx_build_spell_string();
#endif
extern NODE_PTR		    root;
extern STRING_PTR	    labels;
/*
 * Functions Called:
 */
/*
 * Function Glo_Scan_File - Documentation Section
 *
 * Discussion:
 *	Scan the specified glossary file for definitions which have labels which
 *	match the labels in the label list.  If a match is found, copy the
 *	information to a NODE in the data structure and delete the label from
 *	the label list.  Entries are placed in the data structure in alphabetic
 *	order.
 *
 * Calling Synopsis:
 *	status = Glo_Scan_File (file)
 *
 * Inputs:
 *	file	    ->	is a STRING_PTR, passed by value.  It contains the
 *			descriptor for the file specification string to be
 *			used to open the file.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status	    ->	is a boolean flag which indicates the result of the
 *			operation.  It will have a value of TRUE unless an
 *			error occurs which is fatal to the execution of the
 *			program.
 *
 * Global Data:
 *	root	    ->	additions may be made to the internal data structure.
 *
 *	labels	    ->	if a definition is found, its entry in the label list is
 *			removed.
 *
 * Files Used:
 *	The specified definition file is opened for read access.
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	The operation succeeded.  This means that no error which
 *			is fatal to the operation of the program occurred, not 
 *			that the program proceeded without error.
 *
 * Error Conditions:
 *	status == FALSE	A fatal error occurred.  The reason for the error should
 *			be described in a message to the user
 *
 * Algorithm:
 *	A. If the labels list is empty, return immediately.
 *	B. Convert the file spec to an ASCIZ string and open the file.
 *	C. For each entry in the file,
 *	    1. Locate the label.
 *	    2. If the label list contains that label,
 *		a. Delete the label from the label list.
 *		b. Allocate a NODE.
 *		c. Fill in the NODE with the item and spell string.
 *		d. Fill in the NODE with definition lines.
 *		e. Chain the NODE into the data structure, using the spell 
 *		   string to position it correctly.
 *	D. Close the file.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Glo_Scan_File - Code Section
 */
int	Glo_Scan_File (file)
STRING_PTR	    file;
{
/*
 * Local Declarations
 */
  char	    dna[256];
  FILE	    *f;
  int		    counter = 0;
  char	    line[linesz];
  char	    token[linesz];
  char	    item[linesz];
  int	    ptr;
  int	    status;
  NODE_PTR  node;
  int	    mode = FALSE;
/*
 * Module Body
 */
  if (labels == 0) return (TRUE);
/* Open the glossary definition file */
  (void)sprintf(dna, "%s.gdf", file->desc);
  if ((f = fopen(dna, "r")) == NULL) {
    (void)printf ("Could not find the glossary definition file %s\n",
		  file->desc);
    return (TRUE);
  }
  while (fgets(line, linesz, f) != 0) {
/*
 * If mode = FALSE, we are in discard mode.  In this mode, we throw away any
 * line which does not begin with "@entry".  If mode = TRUE, we are in copy
 * mode.  In this mode, we copy any text to the current node.  In either case,
 * if the line begins with "@entry", we must process it.
 */
    status = strncmp(line, "@entry{", 7);
    if (status != 0) {
      if (mode)	(void)glo_copy_text(line, node);
      continue;
    }
/*
 * We've found a new entry in the file.  Begin processing it.
 * Search for the first line of a definition entry and parse out the label.
 */
    status = glo_parse_item(line, token, item, &ptr);
    if (!status) {
      mode = FALSE;
      continue;
    }
/* Search the label list for a label with the same name as the token. */
    status = glo_match_label (token);
    if (!status) {
      mode = FALSE;
      continue;
    }
    counter++;
/* Allocate and build the NODE structure */
    node = (NODE_PTR) malloc(sizeof(NODE));
    if (node == 0) {
      (void)printf("Could not allocate space for a new definition\n");
      return(FALSE);
    }
    node->next = 0;
    node->hdr = 0;
    node->spell = strdup(item);
    idx_build_spell_string(node->spell);
    node->item=strdup(item);
/* Link the NODE into the data structure in alphabetical order */
    status = glo_link_node(node);
    if (!(status & TRUE)) {
      (void)printf("Duplicate glossary entry \'%s\' ignored\n", item);
      mode = FALSE;
      continue;
    }
/* If there is text on the @entry line, use it.	*/
    if (line[ptr] != '\0') (void)glo_copy_text (&line[ptr], node);
    mode = TRUE;
  }
/* End of file seen. */
  (void)fclose(f);
  if (counter == 0) {
    (void)printf("No definitions found in file %s.gdf\n", file->desc);
  }
  else
    (void)printf("%d definitions extracted from file %s.gdf\n",
		 counter, file->desc);
  return (TRUE);
}

/*
 * Function Glo_Link_Node - Documentation Section
 *
 * Discussion:
 *	Link a node into the node list in alphabetical order.
 *
 * Calling Synopsis:
 *	status = Glo_Link_Node (node)
 *
 * Inputs:
 *	node	    ->	is a NODE_PTR, passed by reference.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status	    ->	is a boolean integer.  TRUE means a successful link.
 *			FALSE means a duplicate entry was found.
 *
 * Global Data:
 *	root	    ->	the node is linked into the root list.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	node was linked in.
 *
 * Error Conditions:
 *	status == FALSE	a node with this spell string was already linked into
 *			the list.  This node is NOT linked in.
 *
 * Algorithm:
 *	A. If the list is empty, link the node in at the root.
 *	B. Else,
 *	    1. Beginning at the first node in the list,
 *		a. If spell-string < node-spell-string,
 *		    1. Get next node.
 *		b. If spell-string = node-spell-string,
 *		    1. Return FALSE.
 *		c. If spell-string > node-spell-string,
 *		    1. Link it in.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Glo_Link_Node - Code Section
 */
static int	glo_link_node (node)
NODE_PTR	node;
{
/*
 * Local Declarations
 */
  NODE_PTR		old_node;
  NODE_PTR		new_node;
  int			status;
/*
 * Module Body
 */
  old_node = 0;
  new_node = root;
  while (new_node != 0) {
    status = strcmp(new_node->spell, node->spell);
    if (status < 0) {
      old_node = new_node;
      new_node = new_node->next;
      continue;
    }
    if (status == 0) return (FALSE);
    if (status > 0) {
      if (old_node == 0) root = node;
      else old_node->next = node;
      node->next = new_node;
      return(TRUE);
    }
  }
/* If the list is exhausted, link the node in at the end. */
  if (old_node == 0) root = node;
  else old_node->next = node;
  return(TRUE);
}

/*
 * Function Glo_Match_Label - Documentation Section
 *
 * Discussion:
 *	See if a label in the label list matches the current file entry.
 *	If it does, then deallocate the label entry and return TRUE.
 *	Otherwise, return FALSE.
 *
 * Calling Synopsis:
 *	status = Glo_Match_Label (token)
 *
 * Inputs:
 *	token	    ->	is the file token string to be matched against the label
 *			list.  It is an ASCIZ string.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status	    ->	is a boolean integer.  TRUE indicates that this entry
 *			corresponds to a valid label in the label list.  The
 *			label list entry is removed.  FALSE indicates that
 *			this entry does not match a label in the label list.
 *
 * Global Data:
 *	labels	    ->	if the entry matches a label, that STRING in the label
 *			list is removed.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	    We have a match.
 *	status == FALSE	    No match.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. For each entry in the label list,
 *	    1. Compare the label with the token.
 *	    2. If they match,
 *		a. Unchain and deallocate the STRING.
 *		b. Return TRUE.
 *	    3. Else,
 *		a. Get the next label.
 *	B. If no match,
 *	    1. Return FALSE.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Glo_Match_Label - Code Section
 */
static int	glo_match_label (token)
char	        *token;
{
/*
 * Local Declarations
 */
  int		    length = strlen(token);
  STRING_PTR	    old;
  STRING_PTR	    new;
/*
 * Module Body
 */
  if (labels == 0) return (FALSE);
  old = labels;
  new = labels;
  while (new != 0) {
/* Search the label list for a matching token */
    if ((strlen(new->desc)) != length)
      if (TRUE)	{
	old = new;
	new = new->next;
	continue;
      }
    if (strncmp(token, new->desc, length) != 0)
      if (TRUE)	{
	old = new;
	new = new->next;
	continue;
      }
/* Found the matching token.  Delete the entry in the labels list. */
    if (new == labels) labels = new->next;
    else old->next = new->next;
    (void)free((char *)new->desc);
    (void)free((char *)new);
    return (TRUE);
  }
  return (FALSE);
}

/*
 * Function Glo_Parse_Item - Documentation Section
 *
 * Discussion:
 *	Parse the label and item strings from a line with the format
 *	    @entry{label,item}[text]
 *	leaving the pointer pointing to the optional text string.
 *
 * Calling Synopsis:
 *	status = Glo_Parse_Item (line, label, item, ptr)
 *
 * Inputs:
 *	line	    ->	is the text line to parse.  It is an ASCIZ text string.
 *
 *
 * Outputs:
 *	label	    ->	is the label text string.  It is an ASCIZ text string.
 *
 *	item	    ->	is the item text string.  It is an ASCIZ text string.
 *
 *	ptr	    ->	is an integer index value, passed by reference.  It is
 *			the array index of the next character of the line
 *			string, after the closing "}".
 *
 * Return Value:
 *	status	    ->	is a result indicator.  If TRUE, the label and item
 *			elements were found.  If FALSE, there was a problem
 *		        with the parse operation.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	Operation succeeded.  All output variables were 
 *			returned.
 *
 * Error Conditions:
 *	status == FALSE	Operation failed due to a missing syntax element.
 *			None of the output variables is trustworthy.
 *
 * Algorithm:
 *	A. Parse out the label.  It begins with the first character after the
 *	   leading "{" and is terminated by a ",".
 *	B. Find the beginning of the item.  It is starts at the first 
 *	   non-whitespace character after the ",".
 *	C. Parse out the item.  Internally, braces are allowed as long as they
 *	   properly match.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Glo_Parse_Item - Code Section
 */
static int	glo_parse_item (line, label, item, ptr)
char	    *line;
char	    *label;
char	    *item;
int	    *ptr;
{
/*
 * Local Declarations
 */
  int	    i;
  int	    brace;
  char	    x;
/*
 * Module Body
 */
/* Copy the label to the output string.	*/
  i = 0;
  brace = 1;	
  *ptr = 7;
  while(TRUE) {
    x = line[(*ptr)++];
    if (x == '\0') return(FALSE);
    if (x == '{') if (line[*ptr - 2] != '\\') brace++;
    if (x == '}') {
      if (line[*ptr - 2] != '\\') brace--;
      if (brace <= 0) break;
    }
    if (x == ',') break;
    label[i++] = x;
  }
  label[i] = '\0';
/* Find the beginning of the item string. */
  while (isspace(line[*ptr]) != 0) (*ptr)++;
/* Copy the item to the output string. */
  i = 0;
  while (brace > 0) {
    x = line[(*ptr)++];
    if (x == '\0') return(FALSE);
    if (x == '{') if (line[*ptr - 2] != '\\') brace++;
    if (x == '}') {
      if (line[*ptr - 2] != '\\') brace--;
      if (brace <= 0) break;
    }
    item[i++] = x;
  }
  item[i] = '\0';
/* Check to see if the item is missing. If it is, default to the label. */
  if (i == 0) (void)strcpy(item, label);
  return (TRUE);
}

/*
 * Function Glo_Copy_Text - Documentation Section
 *
 * Discussion:
 *	Create a STRING containing the current line of text and chain it in
 *	to the current node's definition list.
 *
 * Calling Synopsis:
 *	status = Glo_Copy_Text (line, node)
 *
 * Inputs:
 *	line	    ->	is the current line of text.  It is an ASCIZ string.
 *
 *	node	    ->	is the NODE_PTR for the current glossary entry.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status 	    ->	is a boolean integer which reflects success or
 *			failure.  TRUE will be returned unless there is a
 *			failure to allocate the string.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	    success.
 *
 * Error Conditions:
 *	status == FALSE	    virtual memory allocation failure.
 *
 * Algorithm:
 *	A. Allocate a STRING data structure.
 *	B. If the input string has a non-zero length,
 *	    1. Copy the string to the STRING data structure.
 *	C. Chain the structure in to the end of the hdr structure.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Glo_Copy_Text - Code Section
 */
static int glo_copy_text (line, node)
char *line;
NODE_PTR node;
{
/*
 * Local Declarations
 */
  int		    length;
  STRING_PTR	    old;
  STRING_PTR	    text;
/*
 * Module Body
 */
  length = strlen(line);
  if (line[length-1] == '\n') length--;
  if (length > 0) {
    text = (STRING_PTR) malloc(sizeof(STRING));
    if (text == 0) return (FALSE);
    text->next = 0;
    text->desc = strdup(line);
    if (node->hdr == 0) node->hdr = text;
    else {
      old = node->hdr;
      while (old->next != 0) old = old->next;
      old->next = text;
    }
  }
  return (TRUE);
}
