% Copyright (c) 2005 Jonathan Fine <jfine@pytex.org>
% License: GPL version 2 or (at your option) any later version.
% $Source: /cvsroot/pytex/pytex/tex2tok/_tex2tok.tex,v $

%  usage: '\LEX{story}

\catcode`|=0                    % for document control sequences
\catcode`{=1
\catcode`}=2
\catcode`$=3
\catcode`&=4
\catcode`#=6
\catcode`^=7
\catcode`_=8
\catcode`@=11

% useful constants
\let\bgroup {
\let\egroup }
\chardef\zero 0
\chardef\one 1

\def\?{\let\space = } \?        % define \space
\let\? \undefined               % clean up afterwards

\chardef\tokfile 16             % change if you wish


\def\loop
% loop through sequence of tokens
% assign to \value the meaning of the next token
{%
  \futurelet\value\loop@A
}


\def\loop@continue
% pick up the next token, and continue the loop
% required, for dealing with space and brace characters
{%
  \afterassignment\loop
  \let\@temp = 
}


\def\loop@A
% is the next token unexpandable?
% always, unexpandable tokens are characters
% always, control sequences are expandable (or undefined)
% sometimes, a character may be expandable
{%
  \expandafter\ifx\noexpand\value\value
     \expandafter\loop@char
  \else
     \expandafter\loop@macro
  \fi
}


\def\loop@macro
% suppress implicit \outer token at end of file
{\expandafter\loop@macro@A\noexpand}

\long\def\loop@macro@A #1%
% #1 - expandable token, picked up from input stream
% execute the token, and resume the loop
% if the token is undefined, call 
{
  \ifx #1\undefined
    \process@undefined #1%
    \expandafter\loop
  \else
    \expandafter #1%
  \fi
}


\long\def\process@undefined #1%
% #1 - undefined control sequence
% default value - can be redefined
{%
  \immediate\write\tokfile{\string #1}%
}


\def\endloop
% when read by \loop, this ends its execution
{}


\def\loop@char
% process next token, which is a character
% might be letter, other, brace or whatever
% afterwards, continue with the loop
{%
  % some tricky code to deal with common cases at high speed
  \ifcase 
    % first deal with the common cases, at high speed
    \ifcat a\value \one\fi      % it's a letter
    \ifcat ?\value \one\fi      % it's punctuation, digit, etc.
    \zero
    \expandafter\loop@char@A    % it's something else
  \or                           % case of letter or digit
    \expandafter\loop@char@default
  \fi
}


\def\loop@char@default #1%
% #1 - a character we can pick up in this way
{%
  \immediate\write\tokfile{#1}%
  \loop
}


\def\loop@char@A
% next token is a special character
% pick it up, and process it
% we already have the \
{%
  \ifx\value
  \ifx\value\@space
    \immediate\write16{ \@percent}
  \else
    \immediate\write16{%
      \expandafter\@char@strip\meaning\value
      \@percent
    }%
  \fi
}


\def\empty{}
\def\loop@char@A 
{%
  \edef\temp
  {%
    \ifx\value\space
      \if11 \fi
    \else\ifx\value\bgroup
      +\string{%
    \else\ifx\value\egroup
      -\string}%
    \fi\fi\fi
  }
  \ifx\temp\empty
    \expandafter\loop@char@default
  \else
    \immediate\write\tokfile{\temp}%
    \expandafter\loop@continue
  \fi
}

\def |LEX #1%
{%
  \begingroup
    \let|par\undefined          % blank lines in input document file
    \let\LEX\undefined          % in case it appears in document file
    \catcode`!=12               % restore to normal value
    \catcode`|=12               % restore to normal value
    \catcode`@=12               % restore to normal value
    \chardef\tokfile 15
    \def\next{\immediate\openout\tokfile \jobname.tok }%
    \expandafter\next
    \expandafter\loop
      \input #1                 % space to terminate file name
    \endloop
    \immediate\closeout\tokfile
  \endgroup
  \end
}

\dump
