% Copyright 2012-2020, Alexander Shibakov
% This file is part of SPLinT
%
% SPLinT is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% SPLinT is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.

%\def\mkpurebyte{\uccode`\@=\yycp@\uppercase{\yybytepure{@}}\uccode`\@=`\@}

% make all symbol characters category 12; most macros are indifferent to this
% however, if delimited macros are used to process the mathched text, the option 
% makes it easier to write such macros;

\def\mkpurebyte{\uccode`\.=\yycp@\uppercase{\yybytepure{.}}\uccode`\.=`\.}

\def\yyinput{\futurelet\next\yyinp@t} % get the code of the next character ...

\def\yyinp@t{% test the code and decide whether to continue lexing the
             % token or return to the parser
  \ifcat\noexpand\next\bgroup 
      \yybreak\yyinputgroup
  \else
      \if\noexpand\next\space % code 32 character token is assumed to
% be an ordinary space (while category 10 tokens are not necessarily);
% the reason for this choice is the way code 32 characters are treated
% by \string (turned into `real' spaces); `funny' spaces of both kinds
% (a character that differs from a `real' space (\catcode`\ ==10
% \number`\ ==32) in either category or character code) can be
% created, however, category 10 characters of character code other
% than 32 are very rare (and take some effort to produce), while
% category 12, character code 32 tokens are poduced as a result of
% `sanitizing' input all the time; note that the spaces are processed
% at the lowest level of the input routine and thus cannot be
% substituted by the switches below; this choice is not bullet proof
% but should suffice for most uses; in the case of more polluted
% input, an extra `sanitation' step can be performed first; if
% desired, category 10 charactes can be assumed to be spaces at this
% point, as well, although this will introduce an extra test, affecting
% the (already poor) efficiency of the input macros.
          \yycp@=`\ % 
          \yybyte={ }%
          \mkpurebyte
          \yybreak@\yyskipspace % return the space
      \else
          \if\noexpand\next\eolletter % cannot use ^^M here since TeX will simply drop the rest of the line
              \yycp@=\n
              \yybyte={\n}%
              \mkpurebyte
              \yybreak@@\yyskipspace % return the end of line character
          \else
              \yybreak@@\yy@np@t
          \fi
      \fi
  \yycontinue
}

% some \Cee\ escape characters; the rest are either silly (like \a and \b) or
% are already defined to have other important functions in \TeX\ (such as \v and \t)

\chardef\n=`\^^J
\chardef\r=`\^^M
\chardef\f=`\^^L
\chardef\HT=`\^^I % ASCII horizontal tab
\chardef\charseq1
\chardef\charac2
\chardef\chargroup3

\def\yyskipspace{\afterassignment\yyreturn\let\next= }

\def\yyinputgroup#1{%
  \yycp@=\chargroup 
  \yybyte={{#1}}%
  \mkpurebyte
  \yyreturn
}

\newif\ifyyinputdebug

\def\yy@np@t#1{% start lexing:
               % whitespace (category 10) tokens are skipped
               % automatically
  \yybyte{#1}%
  \ifyyinputdebug
      \immediate\write16{read: \the\yybyte\space after: \the\yytext@seen}%
  \fi
  \ifx#1\ % a space token
      \yybreak\returnexplicitspace
  \else
      \expandafter\ischar\string#100\end 
      \ifchar % a single character (not a control sequence)
          \ifcat\noexpand#1\noexpand~% is it an active character?
              \yycp@=\charac % yes, return it
              \mkpurebyte
              \yybreak@@\yyreturn
          \else % it is a non-active character ...
                % ... or a control sequence with an empty name (obtained by \csname\endcsname or
                % \enlinechar=-1 \toks0={\
                % } this case can be handled by the switch below but is ignored for the moment)
              \let\default\achardefault
              \yybreak@@{\switchon{\the\yybyte}\in\acharswitch}%
          \fi
      \else % it is a control sequence, return it
          \expandafter\ischar\string#10\end 
          \ifchar % it is a one-char control sequence
              \let\default\onechardefault
              \yybreak@@{\switchon{\the\yybyte}\in\onecharswitch}%
          \else
              \let\default\multichardefault
              \yybreak@@{\switchon{\the\yybyte}\in\multicharswitch}%
          \fi
      \fi
  \yycontinue
}

\chardef\explicitspacecode=`\ %

\def\returnexplicitspace{%
      \yycp@=\explicitspacecode 
      \mkpurebyte
      \yyreturn % keep looking for a non-whitespace token
}

\def\onecharswitch{
    \raw \n \raw {%
        \yycp@=\n
        \mkpurebyte
        \yyreturn
    }
    \raw \^^M\raw {%
        \returnexplicitspace
    }
}

\def\onechardefault{%
    \expandafter\yycp@\expandafter`\the\yybyte\relax
    \mkpurebyte
    \yyreturn
}%

\def\multicharswitch{
    \raw\vb\raw {%
        \vbunwrap
    }
    \raw\insertraw\raw {%
        \insertrawnext
    }
    \raw\stashed\raw {%
        \stashnext
    }
    \raw\format \formatlocal\raw {%
        \formatnext
    }
    \raw\formatbegin\raw {%
        \fmtbegin
    }
    \raw\formatp\raw {%
        \fmtparam
    }
    \raw\sflush\raw {%
        \sflushnext
    }
    \raw\yyeof\raw {%
        \yycp@=\YYENDOFBUFFERCHAR\relax
        \yybytepure={}%
        \yyreturn
    }
    \raw\inputboundary\raw {%
        \inputboundarynext
    }
    \raw\flatten \resetf \inline \skipheader\raw {%
        \expandafter\yyinput\expandafter\format\expandafter{\the\yybyte}%
    }
    \raw\fold \breakline\raw {% 
        \expandafter\yyinput\expandafter\formatlocal\expandafter{\the\yybyte}%
    }
    \raw\breakahead\raw {% 
        \expandafter\yyinput\expandafter\formatp\the\yybyte
    }
    \raw\break\raw {% for testing purposes
        \yycp@=`\ %
        \yybytepure={ }% 
        \yyreturn
    }
    \raw\squashtermstrue\raw {%
        \yycp@=`\ %
        \yybytepure={ }% 
        \squashtermstrue
        \yyreturn
    }
    \raw\endparse \endparseinput\raw {%
        \errmessage{internal error: reading past the end of the input buffer}%
    }
}

\def\multichardefault{%
    \yycp@\charseq
    \mkpurebyte
    \yyreturn
}%

\def\achardefault{%
    \expandafter\yycp@\expandafter`\the\yybyte\relax
    \mkpurebyte
    \yyreturn
}

\def\acharswitch{}

% the following commands are only here for debugging purposes
% they slow down the input

\setspecialcharsfrom\onecharswitch
\setspecialcharsfrom\multicharswitch
\setspecialcharsfrom\acharswitch

\def\insertrawnext#1{% insert a command
    #1\yyinput
}

\def\vbunwrap#1#2\vb{%
    \yyinput#1\stashed{#2}\vb
}

\chardef\stashchar=`\ %
\chardef\formatchar=`\ %

\newcount\stashmarker
\newcount\formatmarker

\def\stashnextwithspace#1{%
    \yybytepure{ }\yycp@\stashchar
    \yybyte\expandafter{\the\yybyte{#1}}%
    \advance\stashmarker\@ne
    \edef\next{\yysbyte{\noexpand\strm[\the\stashmarker]{\the\yybyte}.[\the\stashmarker]}}\next
    % the extra `.' is to avoid the stripping of braces by the parameter scanning mechanism of TeX
    \yyreturn
}

% the mechanism for stash processing making stash invisible

\def\stashnextwithnothing#1{%
    \advance\stashmarker\@ne
    \yybyte\expandafter{\the\yybyte{#1}}\concat\yysubtext\yybyte
    \appendr\yysbyte{\noexpand\strm[\the\stashmarker]{\the\yybyte}.[\the\stashmarker]}%
    \ifyyinputdebug
        \immediate\write16{stash byte: \the\yysbyte mid text: \the\yysubtext}%
    \fi
    % the extra `.' is to avoid the stripping of braces by the parameter scanning mechanism of TeX
    \yyinput
}

% while collecting the stash, the macro below packages the stash contents into a command sequence
% to add to the \yystash stream; the stash is presented to the lexer unpackaged though; this
% introduces a minor inefficiency while ensuring that exactly one level of packaging is present
% (otherwise the lexer may back up and repackage already packaged stash); the inefficiency only 
% persists while the lexer scans the current token (and possibly backs up).

\def\stashnextwithnothingnx#1{%
    \advance\stashmarker\@ne
    \yypush{#1}\on\astarray
    \appendr\yysbyte{\noexpand\strm[\the\stashmarker]{\the\yybyte{%
        \expandafter\noexpand\romannumeral\gettopofstackcsx\astarray}}.[\the\stashmarker]}%
    \yybyte\expandafter{\the\yybyte{#1}}% do not package the input
    \concat\yysubtext\yybyte
    \ifyyinputdebug
        \immediate\write16{stash byte: \the\yysbyte mid text: \the\yysubtext}%
    \fi
    % the extra `.' is to avoid the stripping of braces by the parameter scanning mechanism of TeX
    \yyinput
}

\let\stashnext\stashnextwithnothingnx

\def\formatnext#1{%
    \yybytepure{ }\yycp@\formatchar
    \toksa{#1}%
    \advance\formatmarker\@ne
    \edef\next{\yyfbyte{\noexpand\strm[\the\formatmarker]{\the\yybyte{\the\toksa}}.[\the\formatmarker]}}\next
    % the extra `.' is to avoid the stripping of braces by the parameter scanning mechanism of TeX
    \yybyte\expandafter{\the\yybyte{#1}}%
    \yyreturn
}

\def\fmtbegin#1\fmtend{\formatnext{#1}} % multiparameter format sequences
\def\fmtparam#1#2{\formatnext{#1{#2}}} % single parameter format sequences

\chardef\boundarychar=`\ %

% the following is a minimal setup of a parsing boundary

\def\inputboundarynext#1{% `l' for left boundary
    \yybytepure{ }\yycp@\boundarychar
    \yybyte\expandafter{\the\yybyte{#1}}%
    \yyreturn % inserting something here will effectively insert it into the input stream
}

\def\sflushnext#1#2{% #1 is the marker
                    % #2 is the contents
    \yybytepure{ }\yycp@\stashchar
    \edef\next{\toksc{\the\yystash\the\yystashseen}}\next
    \yyfifolastidx\toksc\in\toksc
    \toksa{#1}\toksb{#2}%
    \appendr\yysbyte{\noexpand\strm[\the\toksa]{\the\yybyte{{\nx\it flushing}: ``\the\toksb'', 
        {\nx\it last index}: $[\the\toksc]$. }}.[\the\toksa]}%
    % the extra `.' is to avoid the stripping of braces by the parameter scanning mechanism of TeX
    \yybyte\expandafter{\the\yybyte{#1}{#2}}%
    \concat\yysubtext\yybyte
    \ifyyinputdebug
        \immediate\write16{stash byte: \the\yysbyte mid text: \the\yysubtext}%
    \fi
    \yyreturn
}

% the following implementation is marginally cleaner as it does not redefine \yyr@@dfifo
% it also makes it almost transparent that \yyreadfifo does not modify any token registers
% except for the two of its parameters
% TODO: replace \yyr@@dfifo with \yyr@adfifo

\def\yyreadfifo#1\to#2\in#3{%
%{% to reduce the side effects to the redefinition of the input and output
  % token registers 
    \def\yyr@adfifo##1\strm[#2]##2[#2]##3\end{%
        \yystringempty{##3}{#3{}}% there is no such marker in the fifo
        {%
            #3{##1\strm[#2]##2[#2]}%
            \def\yyr@@dfifo####1\strm[#2]####2[#2]\end{#1{####1}}% strip off the inserted string
            \yyr@@dfifo##3\end
% TODO:            \def\yyr@dfifo####1\strm[#2].[#2]\end{#1{####1}}% strip off the inserted string
%                  \yyr@dfifo##3\end
        }%
    }%
    \expandafter\yyr@adfifo\the#1\strm[#2].[#2]\end
%\edef\next{#1{\the#1}#3{\the#3}}\expandafter}\next
}

\def\yytrimfifo#1\to#2\in#3{% just save the initial segment of #1 in #3
    \def\yyr@adfifo##1\strm[#2]##2[#2]##3\end{%
        \yystringempty{##3}{#3{}}% there is no such marker in the fifo
        {%
            #3{##1\strm[#2]##2[#2]}%
        }%
    }%
    \expandafter\yyr@adfifo\the#1\strm[#2].[#2]\end
}

\def\yyreadfifox#1\to#2\in#3{% read the fifo to a marker in #2
    \expandafter\yyreadfifo\expandafter#1\expandafter\to\number#2\in#3% 
}

\def\yyreadfifoxx#1\to#2\in#3{% read the fifo to a marker in #2
    \expandafter\yyreadfifo\expandafter#1\expandafter\to#2\in#3% 
}

\def\yyreadfifot#1\to#2\in#3\with#4{% read fifo with a test:
    \def\yyr@adfifo##1\strm[#2]##2[#2]##3\end{%   #4 is a \macro[##1]##2.[##3] which 
        \def\yyr@@dfifo{##3}%         looks at its parameters and fills up
        \ifx\yyr@@dfifo\empty       % \othertoks (to be attached at the beginning of #1 
                                    % after the initial segment has been removed)
                                    % and \fifotoks (to be used by another macro)
            #3{}% there is no such marker in the fifo
        \else
            \let\fifotoks#3%
            \let\othertoks#1
            \let\@@strm\strm
            \let\strm#4%
            \fifotoks{}%
            \othertoks{}%
            ##1\strm[#2]##2[#2]% run all the tests
            \let\strm\@@strm
            \def\yyr@@dfifo####1\strm[#2]####2[#2]\end{#1\expandafter{\the\othertoks####1}}%
            \yyr@@dfifo##3\end
        \fi
    }%
    \expandafter\yyr@adfifo\the#1\strm[#2].[#2]\end
}

\def\yyreadfifotx#1\to#2\in#3\with#4{% read the fifo to a marker in #2
    \expandafter\yyreadfifot\expandafter#1\expandafter\to\number#2\in#3\with{#4}% 
}

% an example of a sorting macro: extract all the local formatting

\def\formatsort[#1]#2.[#3]{% clobbers \toksb, \toksc
   \formatgrab#2\end
   \toksc{\strm[#1]{#2}.[#3]}%
   \let\default\formatdefaultact
   \switchon{\the\toksb}\in\formatswitch
}

\def\formatgrab#1#2\end{\toksb{#1}}

\def\formatswitch{%
    \formatlocal {%
        \edef\next{\fifotoks{\the\fifotoks\the\toksc}}\next
    }
}

\def\formatdefaultact{%
    \edef\next{\othertoks{\the\othertoks\the\toksc}}\next
}

\def\yygrabfifoelem#1\at#2\in#3{%
    \def\yyr@adfifo##1\strm[#2]##2[#2]##3\end{%
        \def\yyr@@dfifo{##3}% 
        \ifx\yyr@@dfifo\empty
            #3{}% there is no such marker in the fifo
        \else
            #3{\strm[#2]##2[#2]}%
            \def\yyr@@dfifo####1\strm[#2]####2[#2]\end{#1{####1}}% strip off the inserted string
            \yyr@@dfifo##1##3\end
        \fi
    }%
    \expandafter\yyr@adfifo\the#1\strm[#2].[#2]\end
}

\def\yygrabfifoelemx#1\at#2\in#3\with#4{% read the fifo to a marker in #2
    \expandafter\yygrabfifoelem\expandafter#1\expandafter\at\number#2\in#3\with{#4}% 
}

\def\yypeekfifoelem#1\at#2\in#3{% just read the first element that matches the marker
    \def\yyr@adfifo##1\strm[#2]##2[#2]##3\end{%
        \def\yyr@@dfifo{##3}% 
        \ifx\yyr@@dfifo\empty
            #3{}% there is no such marker in the fifo
        \else
            #3{##2}% just get the contents
        \fi
    }%
    \expandafter\yyr@adfifo\the#1\strm[#2].[#2]\end
}

\def\yypeekfifoelemx#1\at#2\in#3\with#4{% read the fifo to a marker in #2
    \expandafter\yypeekfifoelem\expandafter#1\expandafter\at\number#2\in#3\with{#4}% 
}

\def\strmgetfirstidx[#1]#2.[#3]{\toksa{#1}}

\def\yyfifolastidx#1\in#2{%
    \let\next\strm
    \let\strm\strmgetfirstidx
    \toksa{}\the#1%
    #2\toksa
    \let\strm\next
}

\def\z@rotest{0}
\newif\ifchar

\def\ischar#1#2#3\end{% three parameters because #1 can be an
                      % \escapechar
    \def\lastnamechar{#3}%
    \ifx\lastnamechar\z@rotest\chartrue\else\charfalse\fi
}

% trivial input routine

\def\yyinputtrivial{\futurelet\next\yyinp@ttrivial} % get the code of the next character ...

\def\yyinp@ttrivial{
    \ifcat\noexpand\next\space % category 10 token
        \yycp@=`\ % 
        \yybyte={ }%
        \mkpurebyte
        \yybreak\yyskipspace % return the space
    \else
        \yybreak\yy@np@ttrivial
    \yycontinue
}

\def\yy@np@ttrivial#1{%
    \ifcat\noexpand#1a%
        \yycp@`#1%
        \yybyte{#1}%
        \yybytepure{#1}%
    \else
        \if\noexpand#1\eolletter
            \yycp@=\n
            \yybyte\expandafter{\eolletter}%
            \mkpurebyte
        \else
            \ifx#1\yyeof
                \yycp@=\YYENDOFBUFFERCHAR\relax
                \yybyte{#1}%
                \yybytepure={}%
            \else
                \yycp@\charseq
                \mkpurebyte
            \fi
        \fi
    \fi
    \yyreturn
}
