Plan 9 from Bell Labs’s /usr/web/sources/contrib/steve/root/sys/src/cmd/rtf2txt/rtfreader.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/* rtfreadr.c */

#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include "rtftype.h"
#include "rtfdecl.h"

int cGroup;
bool fSkipDestIfUnk;
long cbBin;
long lParam;
RDS rds;
RIS ris;
CHP chp;
PAP pap;
SEP sep;
DOP dop;
SAVE *psave;
Biobuf bout;

char *errmsg[] = {
	[ecOK] 			"ok",
	[ecStackUnderflow]	"unmatched }",
 	[ecStackOverflow]	"Too many {",
	[ecUnmatchedBrace]	"RTF ended in an open group",
	[ecInvalidHex]		"invalid hex char in data",
	[ecBadTable]		"RTF table (sym or prop) invalid",
	[ecAssertion]		"Assertion failed",
	[ecEndOfFile]		"exexpected -1"
};

//
// %%Function: main
//
// Main loop.  Initialize and parse RTF.
//
void
main(int argc, char *argv[])
{
	Biobuf bin, *bp;
	int ec;
	int nerr = 0;

	Binit(&bin, 0, OREAD);
	Binit(&bout, 1, OWRITE);
	if (argc == 1) {
		if ((ec = ecRtfParse(&bin)) != ecOK){
			nerr++;
			if (ec >= 0 && ec < sizeof(errmsg) / sizeof(errmsg[0]))
				fprint(2, "%s\n", errmsg[ec]);
			else
				fprint(2, "unknown error %d\n", ec);
		}
	} else
		for (argv++; *argv; argv++){
		if ((bp = Bopen(*argv, OREAD)) == nil) {
			nerr++;
			fprint(2, "%s cannot open file, %r\n", *argv);
			continue;
		}

		if ((ec = ecRtfParse(bp)) != ecOK){
			nerr++;
			if (ec >= 0 && ec < sizeof(errmsg) / sizeof(errmsg[0]))
				fprint(2, "%s\n", errmsg[ec]);
			else
				fprint(2, "unknown error %d\n", ec);
		}
		Bterm(bp);
	}
	exits((nerr == 0)? nil: "failed");
}

//
// %%Function: ecRtfParse
//
// Step 1:
// Isolate RTF keywords and send them to ecParseRtfKeyword;
// Push and pop state at the start and end of RTF groups;
// Send text to ecParseChar for further processing.
//
int
ecRtfParse(Biobuf *bp)
{
    int ch;
    int ec;
    int cNibble = 2;
    int b = 0;
    while ((ch = Bgetc(bp)) != -1)
    {
	if (cGroup < 0)
	    return ecStackUnderflow;
	if (ris == risBin) // if we are parsing binary data, handle it directly
	{
	    if ((ec = ecParseChar(ch)) != ecOK)
		return ec;
	}
	else
	{
	    switch (ch)
	    {
	    case '{':
		if ((ec = ecPushRtfState()) != ecOK)
		    return ec;
		break;
	    case '}':
		if ((ec = ecPopRtfState()) != ecOK)
		    return ec;
		break;
	    case '\\':
		if ((ec = ecParseRtfKeyword(bp)) != ecOK)
		    return ec;
		break;
	    case 0x0d:
	    case 0x0a: // cr and lf are noise characters...
		break;
	    default:
		if (ris == risNorm)
		{
		    if ((ec = ecParseChar(ch)) != ecOK)
			return ec;
		}
		else
		{ // parsing hex data
		    if (ris != risHex)
			return ecAssertion;
		    b = b << 4;
		    if (isdigit(ch)){
			b += (char) ch - '0';
		    }
		    else
		    {
			if (islower(ch))
			{
			    if (ch < 'a' || ch > 'f')
				return ecInvalidHex;
			    b += (char) ch - 'a' + 10;
			}
			else
			{
			    if (ch < 'A' || ch > 'F')
				return ecInvalidHex;
			    b += (char) ch - 'A' +10;
			}
		    }
		    cNibble--;
		    if (!cNibble)
		    {
			if ((ec = ecParseChar(b)) != ecOK)
			    return ec;
			cNibble = 2;
			b = 0;
ris = risNorm;
		    }
		} // end else (ris != risNorm)
		break;
	    } // switch
	} // else (ris != risBin)
    } // while
    if (cGroup < 0)
	return ecStackUnderflow;
    if (cGroup > 0)
	return ecUnmatchedBrace;
    return ecOK;
}
//
// %%Function: ecPushRtfState
//
// Save relevant info on a linked list of SAVE structures.
//
int
ecPushRtfState(void)
{
    SAVE *psaveNew = malloc(sizeof(SAVE));
    if (!psaveNew)
	return ecStackOverflow;
    psaveNew -> pNext = psave;
    psaveNew -> chp = chp;
    psaveNew -> pap = pap;
    psaveNew -> sep = sep;
    psaveNew -> dop = dop;
    psaveNew -> rds = rds;
    psaveNew -> ris = ris;
    ris = risNorm;
    psave = psaveNew;
    cGroup++;
    return ecOK;
}
//
// %%Function: ecPopRtfState
//
// If we are ending a destination (that is, the destination is changing),
// call ecEndGroupAction.
// Always restore relevant info from the top of the SAVE list.
//
int
ecPopRtfState(void)
{
    SAVE *psaveOld;
    int ec;
    if (!psave)
	return ecStackUnderflow;
    if (rds != psave->rds)
    {
	if ((ec = ecEndGroupAction(rds)) != ecOK)
	    return ec;
    }
    chp = psave->chp;
    pap = psave->pap;
    sep = psave->sep;
    dop = psave->dop;
    rds = psave->rds;
    ris = psave->ris;
    psaveOld = psave;
    psave = psave->pNext;
    cGroup--;
    free(psaveOld);
    return ecOK;
}
//
// %%Function: ecParseRtfKeyword
//
// Step 2:
// get a control word (and its associated value) and
// call ecTranslateKeyword to dispatch the control.
//
int
ecParseRtfKeyword(Biobuf *bp)
{
    int ch;
    char fParam = fFalse;
    char fNeg = fFalse;
    int param = 0;
    char *pch;
    char szKeyword[30];
    char szParameter[20];
    szKeyword[0] = '\0';
    szParameter[0] = '\0';
    if ((ch = Bgetc(bp)) == -1)
	return ecEndOfFile;
    if (!isalpha(ch)) // a control symbol; no delimiter.
    {
	szKeyword[0] = (char) ch;
	szKeyword[1] = '\0';
	return ecTranslateKeyword(szKeyword, 0, fParam);
    }
    for (pch = szKeyword; isalpha(ch); ch = Bgetc(bp))
	*pch++ = (char) ch;
    *pch = '\0';
    if (ch == '-')
    {
	fNeg = fTrue;
	if ((ch = Bgetc(bp)) == -1)
	    return ecEndOfFile;
    }
    if (isdigit(ch))
    {
	fParam = fTrue; // a digit after the control means we have a parameter
	for (pch = szParameter; isdigit(ch); ch = Bgetc(bp))
	    *pch++ = (char) ch;
	*pch = '\0';
	param = atoi(szParameter);
	if (fNeg)
	    param = -param;
	lParam = atol(szParameter);
	if (fNeg)
	    param = -param;
    }
    if (ch != ' ')
	Bungetc(bp);
    return ecTranslateKeyword(szKeyword, param, fParam);
}
//
// %%Function: ecParseChar
//
// Route the character to the appropriate destination stream.
//
int
ecParseChar(int ch)
{
    if (ris == risBin && --cbBin <= 0)
	ris = risNorm;
    switch (rds)
    {
    case rdsSkip:
	// Toss this character.
	return ecOK;
    case rdsNorm:
	// Output a character.  Properties are valid at this point.
	return ecPrintChar(ch);
    default:
    // handle other destinations....
	return ecOK;
    }
}
//
// %%Function: ecPrintChar
//
// Send a character to the output file.
//
int
ecPrintChar(int ch)
{
    // unfortunately, we do not do a whole lot here as far as layout goes...
    Bputc(&bout, ch);
    return ecOK;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].