/* #module    IdxBuild    "3-001"
 ***********************************************************************
 *                                                                     *
 * The software was developed at the Monsanto Company and is provided  *
 * "as-is".  Monsanto Company and the auther disclaim all warranties   *
 * on the software, including without limitation, all implied warran-  *
 * ties of merchantabilitiy and fitness.                               *
 *                                                                     *
 * This software does not contain any technical data or information    *
 * that is proprietary in nature.  It may be copied, modified, and     *
 * distributed on a non-profit basis and with the inclusion of this    *
 * notice.                                                             *
 *                                                                     *
 ***********************************************************************
 */
/*
 * Module Name:	IdxBuild
 *
 * Author:	R L Aurbach	CR&DS MIS Group    27-Apr-1986
 *
 * Function:
 *	Build the Index Tree data structure
 *
 * Modification History:
 *
 * Version     Initials	   Date		Description
 * ------------------------------------------------------------------------
 * 1-001	RLA	27-Apr-1986	Original Code
 * 1-002	RLA	03-May-1986	Add support for page-no highlighting
 *					  and move spell-string processing to a
 *					  separate module.
 * 2-003	RLA	09-Apr-1987	Add support for volumes (used in master
 *					  index processing).
 * 2-004	RLA	14-Apr-1987	Add new page highlight flag
 * 2-005	RLA	15-Apr-1987	Add support for cross-referencing
 * 3-001	F.H.	17-May-1991	converted to portable C
 */
/*
 * Module IdxBuild - Module-Wide Data Description Section
 *
 * Include Files:
 */
#ifdef MSDOS
#include <stdlib.h>
#include <io.h>
#define F_OK		0	/* access(): File exists */
#else
#	include <sys/file.h>
extern char *sprintf();
#endif
#include <string.h>
#include <stdio.h>
#include <malloc.h>
#include <ctype.h>
#include "IdxDef.h"
/*
 * Module Definitions:
 */
#define	    TRUE	1
#define	    FALSE	0
#define	    linebfsize	133
/*
 * Global Declarations:
 */
/*
 * Static Declarations:
 */
static char   *page_ref;
static char   *tok_spell;
static char   see_spell[linebfsize];
#ifdef MSDOS
void idx_build_tree(char *token_1, char *token_2, char *token_3,
	char *page_no, char token_ct, char *vol, int flag);
TREE_PTR idx_locate_node(TREE_PTR *head, char *token, char *desc);
void idx_add_page_ref(PGNODE_PTR *pghead, char *token,
	char highlight, char *vol);
void idx_add_cross_ref(PGNODE_PTR *seehead, char *token,
	char highlight, char *vol);
TREE_PTR idx_allocate_node(char *token, char *desc);
PGNODE_PTR idx_allocate_pgnode(char *vol, char *dsc, char hlite);
#else
void idx_build_tree();
TREE_PTR idx_locate_node();
void idx_add_page_ref();
void idx_add_cross_ref();
TREE_PTR idx_allocate_node();
PGNODE_PTR idx_allocate_pgnode();
#endif
/*
 * External References:
 */
extern TREE_PTR	root;	/* root of the Index Tree */
#ifdef MSDOS
extern void idx_build_spell_string(char *desc);
extern void idx_parse(char *linebf, char *token_1, char *token_2,
	char *token_3, char *page_no, int *token_ct, int *flag);
#else
extern void idx_build_spell_string();
extern void idx_parse();
#endif
/*
 * Functions Called:
 */
/*
 * Function Idx_Build_Tree - Documentation Section
 *
 * Discussion:
 *	This routine uses the information parsed by Idx_Parse to build the Index
 *	Tree.
 *
 * Calling Synopsis:
 *	Call Idx_Build_Tree (token_1, token_2, token_3, page_no, token_ct, 
 *								    vol, flag)
 *
 * Inputs:
 *	token_1	    ->	is the top level item token.  ASCIZ string passed by
 *			reference.
 *
 *	token_2	    ->	is the second level item token.  ASCIZ string passed by
 *			reference.
 *
 *	token_3	    ->	is the third level item token.  ASCIZ string passed by
 *			reference.
 *
 *	page_no	    ->	is the page number token.  ASCIZ string passed by 
 *			reference.
 *
 *	token_ct    ->	is the number of tokens seen for this index entry.
 *			integer passed by value.
 *
 *	vol	    ->	is a pointer to the string descriptor for a volume 
 *			string.  The pointer is passed by value.
 *
 *	flag	    ->	is a boolean flag.  If TRUE, the page_no variable is
 *			really a cross-reference string.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	The Index Tree is updated.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	Tree is updated.
 *
 * Error Conditions:
 *	The only expected error is a memory allocation failure.  In this case,
 *	a message is printed and the entry is ignored.
 *
 * Algorithm:
 *	A. Check for a page-no highlighting flag.
 *	B. Check for a valid set of input data.  Ignore if not.
 *	C. Build a Spell String for the top-level item.
 *	D. Locate the top-level item in the tree.
 *	    1. If it does not exist,
 *		A. Allocate a new node and link it in.
 *	E. If token_ct == 1,
 *	    1. Add page number to list.
 *	F. Else,
 *	    1. Build a Spell String for the second-level item.
 *	    2. Locate the second-level item in the tree.
 *		A. If it does not exist,
 *		    1. Allocate a new node and link it in.
 *	    3. If token_ct == 2,
 *		A. Add page number to list.
 *	    4. Else,
 *		A. Build a Spell String for the third-level item.
 *		B. Locate the third-level item in the tree. 
 *		    1. If it does not exist,
 *			a. Allocate a new node and link it in.
 *		C. Add page number to list.
 *
 * Special Notes:
 *	The first character of token_1 may be a special character.  Special
 *	characters are used to indicate special highlighting to be performed
 *	on the page-no for this index entry.  The special characters are:
 *	    ^	use boldface
 *	    ~	use italic
 *	    _	use underlining
 *	Only one type of highlighting is supported for each page entry.
 */
/*
 * Function Idx_Build_Tree - Code Section
 */
void idx_build_tree (token_1, token_2, token_3, page_no, token_ct, vol, flag)
     char   *token_1;
     char   *token_2;
     char   *token_3;
     char   *page_no;
     char   token_ct;
     char   *vol;
     int    flag;
{
/*
 * Local Declarations
 */
  char		    spell_str[linebfsize];
  TREE_PTR	    node;
  int		    test_char;
  char		    *real_token_1;
  char		    highlight;
/*
 * Module Body
 */
/* Check for page-no highlighting flag */
  if (token_ct == 0) return;
  if (token_1[0] == '\0') return;
  real_token_1 = &token_1[1];
  test_char = token_1[0];
  switch (test_char) {
  case '_' :
    highlight = IDX_K_UNDERLINE;
    break;
  case '~' :
    highlight = IDX_K_ITALIC;
    break;
  case '^' :
    highlight = IDX_K_BOLD;
    break;
  case '#' :
    highlight = IDX_K_FOLLOW;
    break;
  default  :
    highlight = IDX_K_NONE;
    real_token_1 = &token_1[0];
    break;
  }
/* Check out the item for validity */
  if (real_token_1[0] == '\0') return;
  if ((token_ct > 1) && (token_2[0] == '\0')) return;
  if ((token_ct > 2) && (token_3[0] == '\0')) return;
  if (token_ct > 3) return;
  if (page_no[0] == '\0') return;
/* First level token processing	*/
  (void)strcpy(spell_str, real_token_1);
  idx_build_spell_string (spell_str);
  node = idx_locate_node (&root, real_token_1, spell_str);
  if (node == 0) {
    (void)printf("Error processing index item %s\n", real_token_1);
    return;
  }
  if (token_ct == 1) {
    if (!flag)
      idx_add_page_ref (&node->pghead, page_no, highlight, vol);
    else idx_add_cross_ref (&node->seehead, page_no, highlight, vol);
  }
/* Second level token processing */
  else {
    (void)strcpy(spell_str,token_2);
    idx_build_spell_string(spell_str);
    node = idx_locate_node (&node->subhead, token_2, spell_str);
    if (node == 0) {
      (void)printf("Error processing index subitem %s\n", token_2);
      return;
    }
    if (token_ct == 2) {
      if (!flag)
	idx_add_page_ref (&node->pghead, page_no, highlight, vol);
      else
	idx_add_cross_ref (&node->seehead, page_no, highlight, vol);
    }
/* Third level token processing	*/
    else {
    (void)strcpy(spell_str,token_3);
      idx_build_spell_string(spell_str);
      node = idx_locate_node (&node->subhead, token_3, spell_str);
      if (node == 0) {
	(void)printf("Error processing index subsubitem %s\n", token_3);
	return;
      }
      if (!flag)
	idx_add_page_ref (&node->pghead, page_no, highlight, vol);
      else
	idx_add_cross_ref (&node->seehead, page_no, highlight, vol);
    }
  }
}

/*
 * Function Idx_Locate_Node - Documentation Section
 *
 * Discussion:
 *	This routine scans the list of nodes (starting from the listhead),
 *	looking for one which has a spell-string identical to the given
 *	spell string.  If it doesn't find one, it adds a node in the right place
 *	in the chain to maintain alphabetical order.
 *
 * Calling Synopsis:
 *	node = Idx_Locate_Node (head, token, desc)
 *
 * Inputs:
 *	head	    ->	is the address of the listhead of the list to be
 *			scanned.  As such, it is a TREE_PTR, passed by 
 *			reference.
 *
 *	token	    ->	is the token to be located or entered in the list.
 *			ASCIZ string passed by reference.
 *
 *	desc	    ->	is the spell string, passed by descriptor.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	node	    ->	is the address of the tree leaf for the current token.
 *			It is a TREE_PTR, passed by value.  If the leaf did not
 *			already exist and if the system was unable to allocate
 *			one, the node is returned as 0.
 *
 * Global Data:
 *	The list specified by the listhead will be modified if it is not 
 *	possible to add a new leaf when necessary.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State: 
 *	An empty listhead contains 0.
 *
 * Normal Exit State:
 *	The node is returned.
 *
 * Error Conditions:
 *	Allocation failure -- node = 0.
 *
 * Algorithm:
 *	A. If the list is empty,
 *	    1. Allocate a node and link it in.
 *	B. Else,
 *	    1. Beginning with the first node,
 *		a. If spell-string < node-spell-string,
 *		    1. Get next node.
 *		b. If spell-string = node-spell-string,
 *		    1. Return node pointer.
 *		c. If spell-string > node-spell-string,
 *		    1. Allocate a node and link it in.
 *
 * Special Notes:
 *	Although this algorithm isn't a very efficient way to alphatize things,
 *	it has the tremendous virtue of being easy to implement.  Since the
 *	program is not run often (typically a couple of times per document),
 *	the inefficiency is not important.
 */
/*
 * Function Idx_Locate_Node - Code Section
 */
TREE_PTR idx_locate_node (head, token, desc)
     TREE_PTR	*head;
     char	*token;
     char	*desc;
{
/*
 * Local Declarations
 */
  TREE_PTR	node;
  TREE_PTR	old_node;
  TREE_PTR	new_node;
  int		status;
/*
 * Module Body
 */
/* Chain through the list, looking for the right place */
  old_node = 0;
  new_node = *head;
  while (new_node != 0) {
    status = strcmp(new_node->spell, desc);
    if (status < 0) {
      old_node = new_node;
      new_node = new_node->link;
      continue;
    }
    if (status == 0) return (new_node);
    if (status > 0) {
      node = idx_allocate_node (token, desc);
      if (old_node == 0) *head = node;
      else old_node->link = node;
      node->link = new_node;
      return (node);
    }
  }
/* If the list is exhausted, allocate a node immediately */
  node = idx_allocate_node (token, desc);
  if (old_node == 0) *head = node;
  else old_node->link = node;
  return (node);
}

/*
 * Function Idx_Allocate_Node - Documentation Section
 *
 * Discussion:
 *	Allocate and initialize a leaf of the Index Tree.
 *
 * Calling Synopsis:
 *	node = Idx_Allocate_Node (token, desc)
 *
 * Inputs:
 *	token	    ->	token string.  ASCIZ string passed by reference.
 *
 *	desc	    ->	spell string.  Character string passed by descriptor.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	node	    ->	Address of the allocated node.  TREE_PTR passed by 
 *			value.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	node is allocated.
 *
 * Error Conditions:
 *	allocation failure -- node is returned == 0.
 *
 * Algorithm:
 *	A. Allocate virtual memory for the node.
 *	B. Initialize the node structure.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Idx_Allocate_Node - Code Section
 */
TREE_PTR idx_allocate_node (token, desc)
     char    *token;
     char    *desc;
{
/*
 * Local Declarations
 */
  TREE_PTR  node;
/*
 * Module Body
 */
  node = (TREE_PTR) malloc (sizeof(TREE));
  if (node != 0) {
    node->link    = 0;
    node->subhead = 0;
    node->pghead  = 0;
    node->seehead = 0;
    node->spell = strdup(desc);
    node->item = strdup(token);
  }
  return (node);
}

/*
 * Function Idx_Add_Page_Ref - Documentation Section
 *
 * Discussion:
 *	If the specified page reference is not already in the current list,
 *	add it to the end of the list.
 *
 * Calling Synopsis:
 *	Call Idx_Add_Page_Ref (pghead, token, highlight, vol)
 *
 * Inputs:
 *	pghead	    ->	the address of the listhead for the page reference list.
 *			a PGNODE_PTR, passed by reference.
 *
 *	token	    ->	the page reference.  ASCIZ string passed by reference.
 *
 *	highlight   ->	a flag indicating what sort of highlighting to give
 *			this particular page reference.  char passed by value.
 *
 *	vol	    ->	is a pointer to a character string descriptor which
 *			describes the volume string.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	If a new page reference needs to be added, the list is updated.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	If the page reference already exists, nothing happens.  If a new
 *	page reference needs to be added, it is allocated and appended to the
 *	end of the list.
 *
 * Error Conditions:
 *	If there is an allocation failure, the page reference is not added.
 *
 * Algorithm:
 *	A. Put the page_no token in a dynamic string. 
 *	B. For all nodes in the list,
 *	    1. If the page-no in the list does not match the page-no,
 *		a. Get next node.
 *	    2. Else,
 *		a. Return.
 *	C. If the list is exhausted,
 *	    1. Allocate a new node.
 *	    2. Fill it in.
 *	    3. Chain it to the end of the list.
 *
 * Special Notes:
 *	This routine assumes that page-no references are always in numerical
 *	order.  This assumption is necessary, because the variety of page
 *	numbering (perhaps within the document) makes it impossible to check
 *	for order.
 */
/*
 * Function Idx_Add_Page_Ref - Code Section
 */
void idx_add_page_ref (pghead, token, highlight, vol)
     PGNODE_PTR	    *pghead;
     char	    *token;
     char	    highlight;
     char           *vol;
{
/*
 * Local Declarations
 */
  PGNODE_PTR    node;
  PGNODE_PTR    last_node = *pghead;
/*
 * Module Body
 */
  page_ref = strdup(token);
  while (last_node != 0) {
    if ((strcmp(last_node->page_dsc, page_ref) == 0) &&
	((strcmp(last_node->vol, vol)) == 0)) {
      if (highlight == IDX_K_NONE) return;
      last_node->highlight = highlight;
      return;
    }
    if (last_node->link == 0) break;
    last_node = last_node->link;
  }
  node = idx_allocate_pgnode (vol, page_ref, highlight);
  if (node != 0) {
    if (last_node == 0) *pghead = node;
    else last_node->link = node;
  }
}

/*
 * Function Idx_Add_Cross_Ref - Documentation Section
 *
 * Discussion:
 *	Parse and build a cross reference string.  If the string is not already
 *	on the current list, add it to the list in alphabetical order.
 *
 * Calling Synopsis:
 *	Call Idx_Add_Cross_Ref (seehead, token, highlight, vol)
 *
 * Inputs:
 *	seehead	    ->	the address of the listhead for the cross-reference
 *			list.
 *
 *	token	    ->	the cross-reference string.  ASCIZ string passed by
 *			reference.
 *
 *	hightlight  ->	a flag indicating what sort of highlighting to give"
 *			this particular cross-reference.  char passed by value.
 *
 *	vol	    ->	is a pointer to a character string descriptor which 
 *			describes the volume string.f
 *
 * Outputs:e
 *	noneM
 *
 * Return Value:
 *	none 
 *
 * Global Data:a
 *	If a new cross-reference needs to be added, the list is updated.t
 *
 * Files Used:
 *	nonea
 *
 * Assumed Entry State:
 *	nonef
 *
 * Normal Exit State:s
 *	If the cross-reference already exists, nothing happens.  If a new
 *	page reference needs to be added, it is allocated and added to thei
 *	list.
 *
 * Error Conditions:
 *	If there is an allocation failure, the page reference is not added.
 *
 * Algorithm:
 *	A. Parse the page reference into tokens.
 *	B. Build the cross-reference text string and put it in a dynamic string. 
 *	C. Build a spell string for it.
 *	D. Chain the string into the list. 
 *
 * Special Notes:*
 *	none*
 */
/*
 * Function Idx_Add_Cross_Ref - Code Section
 */
void idx_add_cross_ref (seehead, token, highlight, vol)
     PGNODE_PTR	    *seehead;
     char	    *token;
     char	    highlight;
     char           *vol;
{
/*
 * Local Declarationsp
 */
  int		length;
  PGNODE_PTR    node;
  PGNODE_PTR    old_node = 0;
  PGNODE_PTR    new_node = *seehead;
  char		tok_1[linebfsize];
  char		tok_2[linebfsize];
  char		tok_3[linebfsize];
  char		dummy[linebfsize];
  int		tok_ct;
  int		flag;
/*
 * Module Body
 */

/*
 * Parse the cross-reference string into its tokens using Idx_Parse.
 * Then combine the results to build a token string and write it to thei
 * page_ref dynamic string.  Compute its spell string.
 */
  idx_parse(token, tok_1, tok_2, tok_3, dummy, &tok_ct, &flag);
  if (tok_ct == 0) return;
  if (strlen(tok_1) == 0) return;	
  if (tok_ct > 1 && strlen(tok_2) == 0) return;
  if (tok_ct > 2 && strlen(tok_3) == 0) return;
  length = strlen(tok_1);
  (void)sprintf (dummy, "%s", tok_1);
  if (tok_ct > 1) {
    (void)sprintf (&dummy[length], ", %s", tok_2);
    length += strlen(tok_2);
  }
  if (tok_ct > 2) {
    (void)sprintf(&dummy[length], ", %s", tok_3);
    length += strlen(tok_3);
  }
  page_ref = strdup(dummy);
  tok_spell = dummy;
  idx_build_spell_string(tok_spell);
/*
 * Now scan the list of cross-references to find where to put this one.C
 */
  while (new_node != 0) {
    (void)strncpy(dummy, new_node->page_dsc,
		  strlen(new_node->page_dsc));
    dummy[strlen(new_node->page_dsc)] = '\0';
    (void)strcpy(see_spell, dummy);
    idx_build_spell_string(see_spell);
    flag = strcmp(see_spell, tok_spell);
    if (flag < 0) {
      old_node = new_node;
      new_node = new_node->link;
      continue;
    }
    if (flag == 0) return;
    if (flag > 0) {
      node = idx_allocate_pgnode(vol, page_ref, highlight);
      if (node != 0) {
	if (old_node == 0) *seehead = node;
	else old_node->link = node;
	node->link = new_node;
      }
      return;
    }
  }

/* If the list is exhausted, allocate a node immediately */
  node = idx_allocate_pgnode(vol, page_ref, highlight);
  if (node != 0) {
    if (old_node == 0) *seehead = node;
    else old_node->link = node;
  }
}

/*
 * Function Idx_Allocate_PgNode - Documentation Sectioni
 *
 * Discussion:
 *	Allocate and initialize a new page node.o
 *
 * Calling Synopsis:
 *	node = Idx_Allocate_PgNode (vol, dsc, hlite) 
 *
 * Inputs:
 *	vol	    ->	is the address of the volume string descriptor.
 *
 *	dsc	    ->	is the address of the page ref string descriptor.
 *
 *	hlite	    ->	is the highlight flag
 *
 * Outputs:
 *	nonem
 *
 * Return Value:
 *	node	    ->	is the address of the newly allocated node.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State: 
 *	none
 *
 * Error Conditions:
 *	if the allocation failed, node is returned = 0.
 *
 * Algorithm: 
 *	A. Allocate the node.
 *	B. If successful,
 *	    1. Initialize it.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Idx_Allocate_PgNode - Code Section
 */
PGNODE_PTR idx_allocate_pgnode (vol, dsc, hlite)
     char   *vol;
     char   *dsc;
     char   hlite;
{
/*
 * Local Declarations
 */
  PGNODE_PTR    node;
/*
 * Module Body
 */
  node = (PGNODE_PTR) malloc (sizeof(PGNODE));
  if (node != 0) { 
    node->link = 0;
    node->vol = strdup(vol);
    node->highlight = hlite;
    node->page_dsc = strdup(dsc);
  }
  return (node);
}
