/*
   Simple program to check LaTeX code for matching parentheses, etc.

   For installation & usage, see README and run 'check-parens --help'.

   TODO(?):
   - handling of empty '\right.' delimiter is hacky.
   - delimiter size detection is a bit a hack: '\big{(}' doesn't work.
   - false positives for special constructs like:
     '\newenvironment{\foo}{\begin{bar}}{\end{bar}}'


   Copyright (C) 2012--2015  Jaap Eldering (jaap@jaapeldering.nl).

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

 */

#include <cstdio>
#include <cstring>
#include <cctype>
#include <cstdlib>
#include <cstdarg>
#include <getopt.h>
#include <errno.h>

#include <string>
#include <vector>
#include <map>
#include <set>
#include <sstream>
#include <fstream>
#include <iostream>

#include "check-parens-defconfig.hxx"

using namespace std;

#define PROGRAM "check-parens"
#define VERSION "2015.07.24"
#define AUTHORS "Jaap Eldering"

const int EXIT_NONFATAL = 1;
const int EXIT_FATAL = 2;

const string defconfigfilename = ".check-parens.conf";

enum paren_type { paren_hard, paren_soft, paren_env };

char *progname;
char *filename;
FILE *texfile;

int ignore_soft;
int ignore_size;
int file_line_error;
int debug_level;
int show_help;
int show_version;

struct option const long_opts[] = {
	{"ignore-soft",        no_argument,       &ignore_soft,      1 },
	{"ignore-size",        no_argument,       &ignore_size,      1 },
	{"file-line-error",    no_argument,       &file_line_error,  1 },
	{"no-file-line-error", no_argument,       &file_line_error,  0 },
	{"config-file",        required_argument, NULL,             'c'},
	{"debug",              no_argument,       NULL,             'd'},
	{"help",               no_argument,       &show_help,        1 },
	{"version",            no_argument,       &show_version,     1 },
	{ NULL,                0,                 NULL,              0 }
};

void version()
{
	printf("\
%s -- version %s\n\
Written by %s\n\n\
%s comes with ABSOLUTELY NO WARRANTY. This is free software, and\n\
you are welcome to redistribute it under the GNU General Public Licence\n\
as published by the Free Software Foundation; either version 3, or (at your\n\
option) any later version.\n",PROGRAM,VERSION,AUTHORS,PROGRAM);
	exit(0);
}

void usage()
{
	printf("\
Usage: %s [option]... <latex-file>...\n\
Check LaTeX source <latex-file> for non-matching parentheses.\n\
The exitcode is the number of files with fatal errors.\n\
\n\
  -i  --ignore-soft         ignore mismatch of \"soft\" parentheses that\n\
                              do not break LaTeX syntax\n\
  -s  --ignore-size         ignore mismatch of delimiter sizes\n\
      --file-line-error     format mismatch messages as 'file:line:char:error'\n\
                              just as many compilers do (default)\n\
      --no-file-line-error  print messages in more human readable format\n\
  -c  --config-file=FILE    read configuration from FILE (default: ~/%s)\n\
  -d  --debug               add debugging output, repeat for more verbosity\n\
      --help                display this help and exit\n\
      --version             output version information and exit\n",
	       progname, defconfigfilename.c_str());
	exit(0);
}

void error(int errnum, const char *format, ...)
{
	va_list ap;
	va_start(ap,format);

	fprintf(stderr,"%s",progname);

	if ( format!=NULL ) {
		fprintf(stderr,": ");
		vfprintf(stderr,format,ap);
	}
	if ( errnum!=0 ) {
		fprintf(stderr,": %s",strerror(errnum));
	}
	if ( format==NULL && errnum==0 ) {
		fprintf(stderr,": unknown error");
	}

	fprintf(stderr,"\nTry `%s --help' for more information.\n",progname);
	va_end(ap);

	exit(-1);
}

void debug(int level, const char *format, ...)
{
	va_list ap;

	if ( debug_level<level ) return;

	va_start(ap,format);

	fprintf(stderr,"debug: ");
	vfprintf(stderr,format,ap);

	va_end(ap);
}

size_t line = 1, pos = 0;
size_t currline = 0, currpos = 0;

struct paren {
	string open, close, size;
	paren_type type;
	size_t line, pos;

	paren() {}

	paren(string _open, string _close, paren_type _type=paren_hard):
		open(_open), close(_close), type(_type), line(currline), pos(currpos) {}

	paren(const paren& p, string _size):
		open(p.open), close(p.close), size(_size), type(p.type),
		line(currline), pos(currpos) {}
};

vector<paren> paren_list;
map<string,string> delim_list;
map<string,paren> paren_open, paren_close;

struct paren_stack {
	vector<paren> data;
	int lasthard;

	paren_stack(): data(), lasthard(-1) {}

	bool  empty() { return data.empty(); }
	size_t size() { return data.size(); }

	void push(const paren& p)
	{
		if ( !p.type==paren_soft ) lasthard = data.size();
		data.push_back(p);
	}

	const paren& top()
	{
		return data.back();
	}

	void pop()
	{
		data.pop_back();
		while ( lasthard>=(int)data.size() ||
		        (lasthard>=0 && data[lasthard].type==paren_soft) ) lasthard--;
	}

	// Check if argument is at top (or highest "hard" parenthesis)
	int attop(const string& s, bool soft)
	{
		if ( soft ) {
			return data.size()>0 && s==data.back().close;
		} else {
			return lasthard>=0 && s==data[lasthard].close;
		}
	}

	// Remove irrelevant elements from stack, e.g. before printing it.
	void clean()
	{
		for(int i=data.size()-1; i>=0; --i) {
			paren& p = data[i];
			if ( (ignore_soft && p.type==paren_soft) ) {
				data.erase(data.begin()+i);
			}
		}
	}
};

int readchar()
{
	int c = getc(texfile);
	pos++;
	if ( c==EOF ) return EOF;
	if ( c=='\n' ) { line++; pos = 0; }
	return c;
}

int unreadchar(int c)
{
	if ( c=='\n' ) { line--; pos = 0; }
	pos--;
	return ungetc(c,texfile);
}

void message(size_t line, size_t pos, const char *format, ...)
{
	va_list ap;
	va_start(ap,format);

	if ( file_line_error ) {
		if ( line!=0 ) printf("%s:%zu:%zu: ",filename,line,pos);
	} else {
		if ( line!=0 ) printf("Line %zu, char %zu: ",line,pos);
	}

	vprintf(format,ap);

	printf("\n");

	va_end(ap);
}

void readconfig(istream& conf)
{
	string line;

	while ( getline(conf,line) && !conf.fail() ) {
		istringstream ss(line);
		string command, par_open, par_close, delim, token;
		if ( !(ss >> command) || command.substr(0,1)=="#" ) continue;
		if ( command=="paren" ) {
			ss >> token;
			if ( token!="=" ) error(0,"expected `=' after `%s' command",command.c_str());
			if ( ! (ss >> par_open >> par_close) ) error(0,"expected paren open and close");
			paren_type type = paren_hard;
			if ( ss >> token && token=="soft" ) type = paren_soft;
			paren_list.push_back(paren(par_open,par_close,type));
		} else if ( command=="delimsize" ) {
			ss >> token;
			if ( token!="=" ) error(0,"expected `=' after `%s' command",command.c_str());
			if ( ! (ss >> delim) ) error(0,"expected a paren delimiter");
			delim_list[delim] = delim;
			while ( ss >> token ) delim_list[token] = delim;
		} else {
			error(0,"found unknown command `%s' in configuration file",command.c_str());
		}
	}
}


int readtoken(string &tok, bool verbatim = false)
{
	int c;

	if ( (c=readchar())==EOF ) return 0;
	currline = line;
	currpos  = pos;

	tok = string(1,c);
	// Gobble comments until end of line
	if ( !verbatim && c=='%' ) {
		while ( c!=EOF && c!='\n' ) c = readchar();
	}
	if ( c!='\\' ) return 1;

	if ( (c=readchar())==EOF ) return 1;
	tok += c;
	if ( !isalpha(c) ) return 1;

	while ( (c=readchar())!=EOF ) {
		if ( isalpha(c) ) {
			tok += c;
		} else {
			unreadchar(c);
			break;
		}
	}

	if ( (tok=="\\begin" || tok=="\\end") && c=='{' ) {
		size_t saveline = currline;
		size_t savepos  = currpos;
		tok += readchar();

		string tok2;
		while ( readtoken(tok2)!=EOF && tok2!="}" ) tok += tok2;
		if ( tok2!="}" ) {
			message(saveline,savepos,"unclosed environment delimiter.");
			return 0;
		}
		tok += tok2;
		currline = saveline;
		currpos  = savepos;
	}

	return 1;
}

void printstack(paren_stack stk)
{
	message(0,0,"Parentheses stack contains %zu elements:",stk.size());
	while ( !stk.empty() ) {
		paren p = stk.top(); stk.pop();
		message(p.line,p.pos,"'%s' (requires closing '%s').",
		        p.open.c_str(),p.close.c_str());
	}
}

// Returns 0 on success, or EXIT_NONFATAL or EXIT_FATAL.
int checkfile(char * const fname)
{
	string tok, delimsize, delimnext;
	paren_stack stk;
	bool soft_paren;

	bool nonfatal_error = false;

	filename = fname;

	texfile = fopen(filename,"r");
	if ( texfile==NULL ) error(errno,"opening file '%s'",filename);

	line = 1, pos = 0;
	currline = 0, currpos = 0;

	debug(1,"starting to check file '%s'.\n",fname);

	while ( readtoken(tok) ) {
		debug(2,"%zu:%zu tok = '%s'\n",currline,currpos,tok.c_str());

		// (Re)set parenthesis delimiter size:
		delimsize = delimnext;
		delimnext = ( delim_list.count(tok) ? tok : "" );

		// Gobble whitespace after delimiter size tokens
		if ( delimsize!="" && isspace(tok[0]) ) {
			delimnext = delimsize;
			continue;
		}

		// Skip over \verb and verbatim environments:
		if ( tok=="\\verb" ) {
			size_t saveline = currline;
			size_t savepos  = currpos;
			char c, endchar = readchar();
			if ( endchar==EOF ) {
				message(currline,currpos,"Unexpected end of file.");
				printstack(stk);
				return EXIT_FATAL;
			}
			debug(1,"%zu:%zu found '%s', searching for end character '%c'\n",
			      saveline,savepos,tok.c_str(),endchar);
			do {
				c = readchar();
				if ( c==EOF ) {
					message(currline,currpos,"Unexpected end of file.");
					printstack(stk);
					return EXIT_FATAL;
				}
			} while ( c!=endchar );
			debug(1,"%zu:%zu found verbatim end '%c'\n",currline,currpos,c);
			continue;
		}
		if ( tok=="\\begin{verbatim}" ) {
			string endtok = "\\end"+tok.substr(6);
			debug(1,"%zu:%zu found '%s', searching for end token '%s'\n",
			      currline,currpos,tok.c_str(),endtok.c_str());
			do { readtoken(tok,true); } while ( tok!=endtok );
			debug(1,"%zu:%zu found verbatim end '%s'\n",
			      currline,currpos,tok.c_str());
			continue;
		}

		// First try to pop token from stack, then push;
		// then matching of mathmode $$'s works out-of-the-box.
		soft_paren = false;
		if ( paren_close.count(tok) ) soft_paren = (paren_close[tok].type==paren_soft);
		if ( !stk.empty() && stk.attop(tok,soft_paren) ) {
			// Print warnings for any soft parentheses dropped:
			paren p = stk.top(); stk.pop();
			while ( p.close!=tok ) {
				if ( !ignore_soft ) {
					message(p.line,p.pos,
					        "closing '%s' at %zu:%zu does not match opening '%s'.",
					        tok.c_str(),currline,currpos,p.open.c_str());
					nonfatal_error = true;
				}
				p = stk.top(); stk.pop();
			}
			debug(1,"pop  '%s' %s %s\n",tok.c_str(),p.size.c_str(),delimsize.c_str());
			// Check for same delimiter size:
			if ( !ignore_size && delim_list[p.size]!=delim_list[delimsize] ) {
				message(currline,currpos,
				        "size '%s' of '%s' does not match "
				        "opening size '%s' at %zu:%zu.",
				        delimsize.c_str(),tok.c_str(),
				        p.size.c_str(),p.line,p.pos);
				nonfatal_error = true;
			}
			continue;
		}
		// Special case closing '\right.'
		if ( !stk.empty() ) {
			paren p = stk.top();
			if ( tok=="." && delimsize=="\\right" &&
			     p.type==paren_soft && delim_list[p.size]=="\\left" ) {
				stk.pop();
				continue;
			}
		}
		// Now push new open tokens onto the stack:
		if ( paren_open.count(tok) ) {
			debug(1,"push '%s' %s\n",tok.c_str(),delimsize.c_str());
			stk.push(paren(paren_open[tok],delimsize));
			continue;
		}
		if ( tok.substr(0,7)=="\\begin{" ) {
			debug(1,"push '%s'\n",tok.c_str());
			stk.push(paren(tok,"\\end"+tok.substr(6)));
			continue;
		}
		// Finally, any stack-able token now means a close error:
		if ( paren_close.count(tok) || tok.substr(0,5)=="\\end{" ) {
			soft_paren = false;
			if ( paren_close.count(tok) ) soft_paren = (paren_close[tok].type==paren_soft);
			if ( !soft_paren || !ignore_soft ) {
				if ( stk.empty() ) {
					message(currline,currpos,
					        "closing '%s' without matching open.",
					        tok.c_str());
					nonfatal_error = true;
				} else {
					paren p = stk.top();
					message(p.line,p.pos,
					        "closing '%s' at %zu:%zu does not match opening '%s'.",
					        tok.c_str(),currline,currpos,p.open.c_str());
					nonfatal_error = true;
				}
			}
			if ( !soft_paren ) {
				message(0,0,"Aborting after fatal mismatch in file '%s'.",filename);
				printstack(stk);
				return EXIT_FATAL;
			}
		}
	}

	debug(1,"finished checking file '%s', checking stack.\n",fname);

	stk.clean();
	if ( !stk.empty() ) {
		message(0,0,"Unmatched elements at end of file '%s'.",filename);
		printstack(stk);
		return EXIT_FATAL;
	}

	if ( fclose(texfile)!=0 ) error(errno,"closing file '%s'",filename);

	if ( nonfatal_error ) return EXIT_NONFATAL;
	return 0;
}

int main(int argc, char **argv)
{
	string configfilename;
	int opt, res;

	progname = argv[0];

	/* Parse command-line options */
	ignore_soft = ignore_size = debug_level = 0;
	file_line_error = 1;
	show_help = show_version = 0;
	opterr = 0;
	while ( (opt = getopt_long(argc,argv,"+isc:d",long_opts,(int *) 0))!=-1 ) {
		switch ( opt ) {
		case 0:   /* long-only option */
			break;
		case 'i':
			ignore_soft = 1;
			break;
		case 's':
			ignore_size = 1;
			break;
		case 'c':
			configfilename = string(optarg);
			break;
		case 'd':
			debug_level++;
			break;
		case ':': /* getopt error */
		case '?':
			error(0,"unknown option or missing argument `%c'",optopt);
			break;
		default:
			error(0,"getopt returned character code `%c' ??",(char)opt);
		}
	}

	if ( show_help ) usage();
	if ( show_version ) version();

	if ( argc<=optind ) error(0,"no file(s) specified");

	// Read configuration from default file if it exists:
	if ( configfilename=="" && getenv("HOME")!=NULL ) {
		configfilename = string(getenv("HOME")) + "/" + defconfigfilename;
		ifstream test(configfilename.c_str());
		if ( !test.good() ) configfilename = "";
	}

	// For some reason (at least my) GCC doesn't support std::string argument?
	ifstream conffile(configfilename.c_str());
	if ( configfilename!="" && !conffile.good() ) {
		error(errno,"cannot open configuration file `%s'",configfilename.c_str());
	}

	if ( configfilename!="" ) {
		readconfig(conffile);
		debug(1,"using configuration file = '%s'\n",configfilename.c_str());
	} else {
		istringstream confstr(defconfigstring);
		readconfig(confstr);
		debug(1,"using internal configuration\n");
	}

	// Initialize map for opening/closing parentheses:
	for(size_t i=0; i<paren_list.size(); ++i) {
		paren_open [paren_list[i].open]  = paren_list[i];
		paren_close[paren_list[i].close] = paren_list[i];
	}

	int error = 0;
	for(res=0; optind<argc; ++optind) {
		// Print empty/filename line between output for different files:
		if ( file_line_error ) {
			if ( error>0 ) printf("\n");
		} else {
			printf("In file %s:\n",argv[optind]);
		}
		error = checkfile(argv[optind]);
		if ( error>=2 ) res++;
		if ( !file_line_error ) printf("\n");
	}

	return res;
}
