/* -*- Mode:Text -*- */
#ifndef lint
static char Rcs_Id[] =
    "$Id: defmt.c,v 1.15 1992/01/07 10:04:51 geoff Exp $";
#endif

/*
 * defmt.c - Handle formatter constructs, mostly by scanning over them.
 *
 * This code originally resided in ispell.c, but was moved here to keep
 * file sizes smaller.
 *
 * Copyright (c), 1983, by Pace Willisson
 *
 * Copyright 1987, 1988, 1989, by Geoff Kuenning, Manhattan Beach, CA
 * Permission for non-profit use is hereby granted.
 * All other rights reserved.
 * See "version.h" for a more complete copyright notice.
 *
 * The TeX code is originally by Greg Schaffer, with many improvements from
 * Ken Stevens.  The nroff code is primarily from Pace Willisson, although
 * other people have improved it.
 */

/*
 * $Log: defmt.c,v $
 * Revision 1.15  1992/01/07  10:04:51  geoff
 * Be a bit smarter about only putting out linefeeds if they were there
 * in the inpu file; this is necessary to correctly handle very long
 * input lines.
 *
 * Revision 1.14  1992/01/04  22:08:11  geoff
 * Correctly handle TeX display-math mode (doubled dollar signs).  Add
 * some comments to the code that handles tib(1) bibliography entries.
 * Fix the tib(1) handling so that it doesn't break if the (apparent)
 * reference crosses a line boundary.
 *
 * Revision 1.13  1991/12/09  00:40:35  geoff
 * Get rid of a never-reached return statement.
 *
 * Revision 1.12  91/09/12  00:01:30  geoff
 * Many changes to improve TeX deformatting: Recognize comments, so that
 * things like dollar signs in comments don't end math mode.  Recognize
 * the "tib" bibliography citation syntax.  Fix a number of places where
 * the parsing got a bit confused.
 * 
 * Revision 1.11  91/07/15  19:26:47  geoff
 * Provide the "canonical" parameter to all *isstringch, strtoichar, and
 * strtosichar calls.
 * 
 * Revision 1.10  91/07/11  19:51:59  geoff
 * Remove the include of stdio.h, since ispell.h now does this.
 * 
 * Revision 1.9  91/07/03  18:20:27  geoff
 * Replace all conversions of chars to ichar_t's with a macro call which
 * (a) does the conversion correctly and (b) makes future changes easy in
 * case (a) is false.
 * 
 * Revision 1.8  91/06/23  22:08:54  geoff
 * When casting to ichar_t, cast to unsigned first to avoid sign-extension
 * problems.
 * 
 * Revision 1.7  90/12/31  00:59:04  geoff
 * Reformat to follow a consistent convention throughout ispell
 * 
 * Revision 1.6  90/10/05  01:54:04  geoff
 * Kenny Stevens' latest fixes:  minword didn't work in ask mode;  LaTeX
 * could put you in math mode unexpectedly, and a couple of other small
 * (he says) bugs.
 * 
 * Revision 1.5  90/04/17  15:31:14  geoff
 * Fix the TeX parsing routines to accept a buffer pointer to be updated,
 * rather than using the global "currentchar."  This fixes a bug in TeX
 * parsing where the routines looked at the wrong character in the line.
 * 
 * Revision 1.4  89/12/27  22:25:40  geoff
 * Add minword support.
 * 
 * Revision 1.3  89/12/27  03:17:50  geoff
 * Move all messages to msgs.h so they can be reconfigured
 * 
 * Revision 1.2  89/10/20  00:10:25  geoff
 * Remove version.h
 * 
 * Revision 1.1  89/06/27  01:56:02  geoff
 * Break ispell.c up into smaller files
 * 
 *
 * The following log lines are from when this code was in ispell.c:
 *
 * Revision 1.63  89/02/17  14:10:31  geoff
 * Improve the DeTex processing some more (Ken Stevens).
 * 
 * Revision 1.52  88/06/25  17:48:26  geoff
 * Add support for the new switches -n, -b, -B, -C, -P, and -m, for the
 * keeping of TeX/nroff special characters in the hash header, and for
 * setting certain defaults in the hash header.  Note that the -B/-C switches
 * add the new routine "compoundgood", and the -P/-m switches replace
 * "nopossibilities" with "easypossibilities" and change the emacs output
 * format.
 * 
 * Revision 1.34  87/06/07  15:25:09  geoff
 * At Don Kark's suggestion, improve ISTEXTERM
 * 
 * Revision 1.33  87/06/07  15:15:24  geoff
 * Integrate Don Karks's TeX brace-handling
 * 
 * Revision 1.18  87/03/31  16:40:59  geoff
 * Integrate Steve Kelem's changes into the main branch:  really wait for
 * a space in givehelp();  accept +/- in troff size strings, and automatically
 * set TeX mode if the file extension is ".tex".
 * 
 * Revision 1.17  87/03/31  15:32:55  geoff
 * Improve troff backslash handling some more (isaac@mulga.oz)
 * 
 * Revision 1.11  87/03/23  00:03:48  geoff
 * Let's try again:  Greg Schaffer's fixed TeX stuff
 * 
 */

#include <ctype.h>
#include "config.h"
#include "ispell.h"
#include "msgs.h"

#define ISTEXTERM(c)   (((c) == TEXLEFTCURLY) || \
			((c) == TEXRIGHTCURLY) || \
			((c) == TEXLEFTSQUARE) || \
			((c) == TEXRIGHTSQUARE))
#define ISMATHCH(c)    (((c) == TEXBACKSLASH) || \
			((c) == TEXDOLLAR) || \
			((c) == TEXPERCENT))

char * skiptoword (bufp)		/* Skip to beginning of a word */
    char *	bufp;
    {

    while (*bufp
      &&  !isstringch(bufp, 0)
      &&  (!iswordch(chartoichar (*bufp))
	||  isboundarych(chartoichar (*bufp))
	||  (tflag  &&  (math_mode & 1)  &&  !TeX_comment))
      )
	{
	/* check paren necessity... */
	if (tflag) /* TeX or LaTeX stuff */
	    {
	    /* Odd numbers mean we are in "math mode" */
	    /* Even numbers mean we are in LR or */
	    /* paragraph mode */
	    if (TeX_comment)
		;			/* Don't check comments */
	    else if (*bufp == TEXPERCENT)
		TeX_comment = 1;
	    else if (math_mode & 1)
		{
		if ((LaTeX_Mode == 'e'  &&  TeX_math_check('e', &bufp))
		  || (LaTeX_Mode == 'm'  &&  TeX_LR_check(1, &bufp)))
		    math_mode--;    /* end math mode */
		else
		    {
		    while (*bufp  && !ISMATHCH(*bufp))
			bufp++;
		    if (*bufp == 0)
			break;
		    if (TeX_math_end(&bufp))
			math_mode--;
		    }
		if (math_mode < 0)
		    {
		    (void) fprintf (stderr,
		     DEFMT_C_TEX_MATH_ERROR);
		    math_mode = 0;
		    }
		}
	    else
		{
		if (math_mode > 1
		  &&  *bufp == TEXRIGHTCURLY
		  &&  (math_mode < (math_mode & 127) * 128))
		    math_mode--;    /* re-enter math */
		else if (LaTeX_Mode == 'm'
		    || (math_mode && (math_mode >= (math_mode & 127) * 128)
		  &&  (strncmp(bufp, "\\end", 4)
		    == 0)))
		    {
		    if (TeX_LR_check(0, &bufp))
			math_mode--;
		    }
		else if (LaTeX_Mode == 'b'  &&  TeX_math_check('b', &bufp))
		    {
		    /* continued begin */
		    math_mode++;
		    }
		else if (LaTeX_Mode == 'r')
		    {
		    /* continued "reference" */
		    TeX_skip_parens(&bufp);
		    LaTeX_Mode = 'P';
		    }
		else if (TeX_math_begin(&bufp))
		    /* checks references and */
		    /* skips \ commands */
		    math_mode++;
		}
	    if (*bufp == 0)
		break;
	    }
	else			/* formatting escape sequences */
	    {
	    if (*bufp == NRBACKSLASH)
		{
		switch ( bufp[1] )
		    {
		    case 'f':
			if(bufp[2] == NRLEFTPAREN)
			    {
			    /* font change: \f(XY */
			    bufp += 5;
			    }
			else
			    {
			    /* ) */
			    /* font change: \fX */
			    bufp += 3;
			    }
			continue;
		    case 's':
			/* size change */
			bufp += 2;
			if (*bufp == '+'  ||  *bufp == '-')
			    bufp++;
			/* This looks wierd 'cause we
			** assume *bufp is now a digit.
			*/
			bufp++;
			if (isdigit (*bufp))
			    bufp++;
			continue;
		    default:
			if (bufp[1] == NRLEFTPAREN)
			    {
			    /* extended char set */
			    /* escape:  \(XX */
			    /* ) */
			    bufp += 4;
			    continue;
			    }
			else if (bufp[1] == NRSTAR)
			    {
			    if (bufp[2] == NRLEFTPAREN)
				bufp += 5;
			    else
				bufp += 3;
			    continue;
			    }
			break;
		    }
		}
	    }
	bufp++;
	}
    if (*bufp == '\0')
	TeX_comment = 0;
    return bufp;
    }

char * skipoverword (bufp)	/* Return pointer to end of a word */
    register char *	bufp;	/* Start of word -- MUST BE A REAL START */
    {
    register char *	lastboundary;
    register int	scharlen; /* Length of a string character */

    lastboundary = NULL;
    while (1)
	{
	if (*bufp == '\0')
	    {
	    TeX_comment = 0;
	    break;
	    }
	else if (l_isstringch(bufp, scharlen, 0))
	    {
	    bufp += scharlen;
	    lastboundary = NULL;
	    }
	/*
	** Note that we get here if a character satisfies
	** isstringstart() but isn't in the string table;  this
	** allows string characters to start with word characters.
	*/
	else if (iswordch (chartoichar (*bufp)))
	    {
	    bufp++;
	    lastboundary = NULL;
	    }
	else if (isboundarych (chartoichar (*bufp)))
	    {
	    if (lastboundary == NULL)
		lastboundary = bufp;
	    bufp++;
	    }
	else
	    break;			/* End of the word */
	}
    /*
    ** If the word ended in one or more boundary characters, 
    ** the address of the first of these is in lastboundary, and it
    ** is the end of the word.  Otherwise, bufp is the end.
    */
    return (lastboundary != NULL) ? lastboundary : bufp;
    }

checkline (ofile)
    FILE *		ofile;
    {
    register char *	p;
    register char *	endp;
    int			hadlf;
    register int	len;
    register int	i;
    int			ilen;

    currentchar = contextbufs[0];
    len = strlen (contextbufs[0]) - 1;
    hadlf = contextbufs[0][len] == '\n';
    if (hadlf)
	contextbufs[0][len] = 0;

    if (!tflag)
	{
	/* skip over .if */
	if (*currentchar == NRDOT
	  &&  (strncmp (currentchar + 1, "if t", 4) == 0
	    ||  strncmp (currentchar + 1, "if n", 4) == 0))
	    {
	    copyout (&currentchar,5);
	    while (*currentchar
	      &&  myspace (chartoichar (*currentchar)))
		copyout (&currentchar, 1);
	    }

	/* skip over .ds XX or .nr XX */
	if (*currentchar == NRDOT
	  &&  (strncmp (currentchar + 1, "ds ", 3) == 0 
	    ||  strncmp (currentchar + 1, "de ", 3) == 0
	    ||  strncmp (currentchar + 1, "nr ", 3) == 0))
	    {
	    copyout (&currentchar, 4);
	    while (*currentchar
	      &&  myspace (chartoichar (*currentchar)))
		copyout(&currentchar, 1);
	    while (*currentchar
	      &&  !myspace (chartoichar (*currentchar)))
		copyout(&currentchar, 1);
	    if (*currentchar == 0)
		{
		if (!lflag  &&  (aflag  ||  hadlf))
		    (void) putc ('\n', ofile);
		return;
		}
	    }
	}


    /* if this is a formatter command, skip over it */
    if (!tflag && *currentchar == NRDOT)
	{
	while (*currentchar  &&  !myspace (chartoichar (*currentchar)))
	    {
	    if (!aflag && !lflag)
		(void) putc (*currentchar, ofile);
	    currentchar++;
	    }
	if (*currentchar == 0)
	    {
	    if (!lflag  &&  (aflag  ||  hadlf))
		(void) putc ('\n', ofile);
	    return;
	    }
	}

    while (1)
	{
	p = skiptoword (currentchar);
	if (p != currentchar)
	    copyout (&currentchar, p - currentchar);

	if (*currentchar == 0)
	    break;

	p = ctoken;
	endp = skipoverword (currentchar);
	while (currentchar < endp  &&  p < ctoken + sizeof ctoken - 1)
	    *p++ = *currentchar++;
	*p = 0;
	strtoichar (itoken, ctoken, 0);
	ilen = icharlen (itoken);

	if (lflag)
	    {
	    if (ilen > minword
	      &&  !good (itoken, 0, 0)  &&  !cflag)
		(void) fprintf (ofile, "%s\n", ctoken);
	    }
	else
	    {
	    if (aflag)
		{
		if (ilen <= minword)
		    {
		    /* matched because of minword */
		    if (!terse)
			(void) fprintf (ofile, "*\n");
		    continue;
		    }
		if (good (itoken, 0, 0))
		    {
		    if (hits[0].prefix == NULL
		      &&  hits[0].suffix == NULL)
			{
			/* perfect match */
			if (!terse)
			    (void) fprintf (ofile, "*\n");
			}
		    else if (!terse)
			{
			/* matched because of root */
			(void) fprintf (ofile, "+ %s\n",
			  hits[0].dictent->word);
			}
		    }
		else if (compoundgood (itoken))
		    {
		    /* compound-word match */
		    if (!terse)
			(void) fprintf (ofile, "-\n");
		    }
		else
		    {
		    makepossibilities (itoken);
		    if (pcount)
			{
			/*
			** print &  or ?, ctoken, then
			** character offset, possibility
			** count, and the possibilities.
			*/
			(void) fprintf (ofile, "%c %s %d %d",
			  easypossibilities ? '&' : '?',
			  ctoken,
			  easypossibilities,
			  (currentchar - contextbufs[0]) - strlen (ctoken));
			for (i = 0;  i < MAXPOSSIBLE;  i++)
			    {
			    if (possibilities[i][0] == 0)
				break;
			    (void) fprintf (ofile, "%c %s",
			      i ? ',' : ':', possibilities[i]);
			    }
			(void) fprintf (ofile, "\n");
			}
		    else
			{
			/*
			** No possibilities found for word TOKEN
			*/
			(void) fprintf (ofile, "# %s %d\n",
			  ctoken,
			  (currentchar - contextbufs[0]) - strlen (ctoken));
			}
		    }
		}
	    else
		{
		if (!quit)
		   correct (ctoken, itoken, &currentchar);
		}
	    }
	if (!aflag  &&  !lflag)
	   (void) fprintf (ofile, "%s", ctoken);
	}

    if (!lflag  &&  (aflag  ||  hadlf))
       (void) putc ('\n', ofile);
   }

/* must check for \begin{mbox} or whatever makes new text region. */
int TeX_math_end (bufp)
    char **	bufp;
    {

    if (TeX_comment)
	return 0;
    else if (**bufp == TEXDOLLAR)
	{
	if ((*bufp)[1] == TEXDOLLAR)
	    (*bufp)++;
	return 1;
	}
    else if (**bufp == TEXPERCENT)
	{
	TeX_comment = 1;
	return 0;
	}
    /* processing extended TeX command */
    (*bufp)++;
    if (**bufp == TEXRIGHTPAREN  ||  **bufp == TEXRIGHTSQUARE)
	return 1;
    if (TeX_LR_begin (bufp))	/* check for switch back to LR mode */
	return 1;
    if (strncmp (*bufp, "end", 3) == 0)
	/* find environment that is ending */
	return TeX_math_check ('e', bufp);
    else
	return 0;
    }

int TeX_math_begin (bufp)
    char **	bufp;
    {

    if (**bufp == TEXDOLLAR)
	{
	if ((*bufp)[1] == TEXDOLLAR)
	    (*bufp)++;
	return 1;
	}
    while (**bufp == TEXBACKSLASH)
	{
	(*bufp)++; /* check for null char here? */
	if (**bufp == TEXLEFTPAREN  ||  **bufp == TEXLEFTSQUARE)
	    return 1;
	if (strncmp (*bufp, "begin", 5) == 0)
	    {
	    if (TeX_math_check ('b', bufp))
		return 1;
	    else
		(*bufp)--;
	    }
	else
	    {
	    TeX_skip_check (bufp);
	    return 0;
	    }
	}
      /*
       * Ignore references for the tib (1) bibliography system, that
       * is, text between a ``[.'' or ``<.'' and ``.]'' or ``.>''.
       * We don't care whether they match, tib doesn't care either.
       *
       * A limitation is that the entire tib reference must be on one
       * line, or we break down and check the remainder anyway.
       */ 
    if ((**bufp == TEXLEFTSQUARE  ||  **bufp == TEXLEFTANGLE)
      &&  (*bufp)[1] == TEXDOT)
	{
	(*bufp)++;
	while (**bufp)
	    {
	    if (*(*bufp)++ == TEXDOT
	      &&  (**bufp == TEXRIGHTSQUARE  ||  **bufp == TEXRIGHTANGLE))
		return TeX_math_begin (bufp);
	    }
	return 0;
	}
    else
	return 0;
    }

int TeX_LR_begin (bufp)
    char **	bufp;
    {

    if ((strncmp (*bufp, "mbox", 4) == 0)
      ||  (strncmp (*bufp, "makebox", 7) == 0)
      ||  (strncmp (*bufp, "fbox", 4) == 0)
      || (strncmp (*bufp, "framebox", 8) == 0))
	math_mode += 2;
    else if ((strncmp(*bufp, "parbox", 6) == 0)
      || (strncmp(*bufp, "raisebox", 8) == 0))
	{
	math_mode += 2;
	TeX_open_paren (bufp);
	if (**bufp)
	    (*bufp)++;
	else
	    LaTeX_Mode = 'r'; /* same as reference -- skip {} */
	}
    else if (strncmp(*bufp, "begin", 5) == 0)
	return TeX_LR_check (1, bufp);	/* minipage */
    else
	return 0;

    /* skip tex command name and optional or width arguments. */
    TeX_open_paren (bufp);
    return 1;
    }

int TeX_LR_check (begin_p, bufp)
    int		begin_p;
    char **	bufp;
    {

    TeX_open_paren (bufp);
    if (**bufp == 0)	/* { */
	{
	LaTeX_Mode = 'm';
	return 0;	/* remain in math mode until '}' encountered. */
	}
    else
	LaTeX_Mode = 'P';
    if (strncmp (++(*bufp), "minipage", 8) == 0)
	{
	TeX_skip_parens (bufp);
	if (**bufp)
	    (*bufp)++;
	if (begin_p)
	    {
	    TeX_skip_parens (bufp); /* now skip opt. args if on this line. */
	    math_mode += 2;
	    /* indicate minipage mode. */
	    math_mode += ((math_mode & 127) - 1) * 128;
	    }
	else
	    {
	    math_mode -= (math_mode & 127) * 128;
	    if (math_mode < 0)
		{
		(void) fprintf (stderr, DEFMT_C_LR_MATH_ERROR);
		math_mode = 1;
		}
	    }
	return 1;
	}
    (*bufp)--;
    return 0;
    }

/* Skips the begin{ARG}, and optionally up to two {PARAM}{PARAM}'s to
 *  the begin if they are required.  However, Only skips if on this line.
 */
void TeX_skip_args (bufp)
    char **	bufp;
    {
    register int skip_cnt = 0; /* Max of 2. */

    if (strncmp(*bufp, "tabular", 7) == 0
      ||  strncmp(*bufp, "minipage", 8) == 0)
	skip_cnt++;
    if (strncmp(*bufp, "tabular*", 8) == 0)
	skip_cnt++;
    TeX_skip_parens (bufp);	/* Skip to the end of the \begin{} parens */
    if (**bufp)
	(*bufp)++;
    else
	return;
    if (skip_cnt--)
	TeX_skip_parens (bufp);	/* skip 1st {PARAM}. */
    else
	return;
    if (**bufp)
	(*bufp)++;
    else
	return;
    if (skip_cnt)
	TeX_skip_parens (bufp);	/* skip to end of 2nd {PARAM}. */
    }

int TeX_math_check (cont_char, bufp)
    char	cont_char;
    char **	bufp;
    {

    TeX_open_paren (bufp);
    /* Check for end of line, continue later. */
    if (**bufp == 0)
	{
	LaTeX_Mode = cont_char;
	return 0;
	}
    else
	LaTeX_Mode = 'P';

    if (strncmp (++(*bufp), "equation", 8) == 0
      ||  strncmp (*bufp, "eqnarray", 8) == 0
      ||  strncmp (*bufp, "displaymath", 11) == 0
      ||  strncmp (*bufp, "array", 5) == 0
      ||  strncmp (*bufp, "picture", 7) == 0
#ifdef IGNOREBIB
      ||  strncmp (*bufp, "thebibliography", 15) == 0
#endif
      ||  strncmp (*bufp, "math", 4) == 0)
	{
	(*bufp)--;
	TeX_skip_parens (bufp);
	return 1;
	}
    if (cont_char == 'b')
	TeX_skip_args (bufp);
    else
	TeX_skip_parens (bufp);
    return 0;
    }

TeX_skip_parens (bufp)
    char **	bufp;
    {

    while (**bufp  &&  **bufp != TEXRIGHTCURLY)
	(*bufp)++;
    }

TeX_open_paren (bufp)
    char **	bufp;
    {
    while (**bufp  &&  **bufp != TEXLEFTCURLY)
	(*bufp)++;
    }

TeX_skip_check (bufp)
    char **	bufp;
    {
    int		charlen;

    /* ADDITIONALLY, MAY WANT TO ADD:
     * input, include, includeonly,
     * documentstyle, pagestyle, pagenumbering
     * WITH TWO {} {}'S TO SKIP:
     * setcounter, addtocounter,
     * setlength, addtolength, settowidth
     */

    if (strncmp(*bufp, "end", 3) == 0
      ||  strncmp(*bufp, "vspace", 6) == 0
      ||  strncmp(*bufp, "hspace", 6) == 0
      ||  strncmp(*bufp, "cite", 4) == 0
      ||  strncmp(*bufp, "ref", 3) == 0
      ||  strncmp(*bufp, "parbox", 6) == 0
      ||  strncmp(*bufp, "label", 5) == 0
      ||  strncmp(*bufp, "input", 5) == 0
      ||  strncmp(*bufp, "nocite", 6) == 0
      ||  strncmp(*bufp, "include", 7) == 0
      ||  strncmp(*bufp, "includeonly", 11) == 0
      ||  strncmp(*bufp, "documentstyle", 13) == 0
#ifndef IGNOREBIB
      ||  strncmp(*bufp, "bibliography", 12) == 0
      ||  strncmp(*bufp, "bibitem", 7) == 0
#endif
      ||  strncmp(*bufp, "hyphenation", 11) == 0
      ||  strncmp(*bufp, "pageref", 7) == 0)
	{
	TeX_skip_parens (bufp);
	if (**bufp == 0)
	    LaTeX_Mode = 'r';
	}
    else if (strncmp(*bufp, "rule", 4) == 0)	/* skip two args. */
	{
	TeX_skip_parens (bufp);
	if (**bufp == 0)	/* Only skips one {} if not on same line. */
	    LaTeX_Mode = 'r';
	else			/* Skip second arg. */
	    {
	    (*bufp)++;
	    TeX_skip_parens (bufp);
	    if (**bufp == 0)
		LaTeX_Mode = 'r';
	    }
	}
    else
	{
	/* Optional tex arguments sometimes should and
	** sometimes shouldn't be checked
	** (eg \section [C Programming] {foo} vs
	**     \rule [3em] {0.015in} {5em})
	** SO -- we'll just igore it rather than make a
	** full LaTeX parser.
	*/

	/* Must look at the space after the command. */
	while (**bufp
	  && (l1_isstringch (*bufp, charlen, 0)
	    ||  iswordch (chartoichar (**bufp))))
	    {
	    if (!isstringch (*bufp + charlen, 0)
	      &&  !iswordch (chartoichar ((*bufp)[charlen])))
		break;
	    *bufp += charlen;
	    }
	}
    }
