/* rtfreadr.c */
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include "rtftype.h"
#include "rtfdecl.h"
int cGroup;
bool fSkipDestIfUnk;
long cbBin;
long lParam;
RDS rds;
RIS ris;
CHP chp;
PAP pap;
SEP sep;
DOP dop;
SAVE *psave;
Biobuf bout;
char *errmsg[] = {
[ecOK] "ok",
[ecStackUnderflow] "unmatched }",
[ecStackOverflow] "Too many {",
[ecUnmatchedBrace] "RTF ended in an open group",
[ecInvalidHex] "invalid hex char in data",
[ecBadTable] "RTF table (sym or prop) invalid",
[ecAssertion] "Assertion failed",
[ecEndOfFile] "exexpected -1"
};
//
// %%Function: main
//
// Main loop. Initialize and parse RTF.
//
void
main(int argc, char *argv[])
{
Biobuf bin, *bp;
int ec;
int nerr = 0;
Binit(&bin, 0, OREAD);
Binit(&bout, 1, OWRITE);
if (argc == 1) {
if ((ec = ecRtfParse(&bin)) != ecOK){
nerr++;
if (ec >= 0 && ec < sizeof(errmsg) / sizeof(errmsg[0]))
fprint(2, "%s\n", errmsg[ec]);
else
fprint(2, "unknown error %d\n", ec);
}
} else
for (argv++; *argv; argv++){
if ((bp = Bopen(*argv, OREAD)) == nil) {
nerr++;
fprint(2, "%s cannot open file, %r\n", *argv);
continue;
}
if ((ec = ecRtfParse(bp)) != ecOK){
nerr++;
if (ec >= 0 && ec < sizeof(errmsg) / sizeof(errmsg[0]))
fprint(2, "%s\n", errmsg[ec]);
else
fprint(2, "unknown error %d\n", ec);
}
Bterm(bp);
}
exits((nerr == 0)? nil: "failed");
}
//
// %%Function: ecRtfParse
//
// Step 1:
// Isolate RTF keywords and send them to ecParseRtfKeyword;
// Push and pop state at the start and end of RTF groups;
// Send text to ecParseChar for further processing.
//
int
ecRtfParse(Biobuf *bp)
{
int ch;
int ec;
int cNibble = 2;
int b = 0;
while ((ch = Bgetc(bp)) != -1)
{
if (cGroup < 0)
return ecStackUnderflow;
if (ris == risBin) // if we are parsing binary data, handle it directly
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
}
else
{
switch (ch)
{
case '{':
if ((ec = ecPushRtfState()) != ecOK)
return ec;
break;
case '}':
if ((ec = ecPopRtfState()) != ecOK)
return ec;
break;
case '\\':
if ((ec = ecParseRtfKeyword(bp)) != ecOK)
return ec;
break;
case 0x0d:
case 0x0a: // cr and lf are noise characters...
break;
default:
if (ris == risNorm)
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
}
else
{ // parsing hex data
if (ris != risHex)
return ecAssertion;
b = b << 4;
if (isdigit(ch)){
b += (char) ch - '0';
}
else
{
if (islower(ch))
{
if (ch < 'a' || ch > 'f')
return ecInvalidHex;
b += (char) ch - 'a' + 10;
}
else
{
if (ch < 'A' || ch > 'F')
return ecInvalidHex;
b += (char) ch - 'A' +10;
}
}
cNibble--;
if (!cNibble)
{
if ((ec = ecParseChar(b)) != ecOK)
return ec;
cNibble = 2;
b = 0;
ris = risNorm;
}
} // end else (ris != risNorm)
break;
} // switch
} // else (ris != risBin)
} // while
if (cGroup < 0)
return ecStackUnderflow;
if (cGroup > 0)
return ecUnmatchedBrace;
return ecOK;
}
//
// %%Function: ecPushRtfState
//
// Save relevant info on a linked list of SAVE structures.
//
int
ecPushRtfState(void)
{
SAVE *psaveNew = malloc(sizeof(SAVE));
if (!psaveNew)
return ecStackOverflow;
psaveNew -> pNext = psave;
psaveNew -> chp = chp;
psaveNew -> pap = pap;
psaveNew -> sep = sep;
psaveNew -> dop = dop;
psaveNew -> rds = rds;
psaveNew -> ris = ris;
ris = risNorm;
psave = psaveNew;
cGroup++;
return ecOK;
}
//
// %%Function: ecPopRtfState
//
// If we are ending a destination (that is, the destination is changing),
// call ecEndGroupAction.
// Always restore relevant info from the top of the SAVE list.
//
int
ecPopRtfState(void)
{
SAVE *psaveOld;
int ec;
if (!psave)
return ecStackUnderflow;
if (rds != psave->rds)
{
if ((ec = ecEndGroupAction(rds)) != ecOK)
return ec;
}
chp = psave->chp;
pap = psave->pap;
sep = psave->sep;
dop = psave->dop;
rds = psave->rds;
ris = psave->ris;
psaveOld = psave;
psave = psave->pNext;
cGroup--;
free(psaveOld);
return ecOK;
}
//
// %%Function: ecParseRtfKeyword
//
// Step 2:
// get a control word (and its associated value) and
// call ecTranslateKeyword to dispatch the control.
//
int
ecParseRtfKeyword(Biobuf *bp)
{
int ch;
char fParam = fFalse;
char fNeg = fFalse;
int param = 0;
char *pch;
char szKeyword[30];
char szParameter[20];
szKeyword[0] = '\0';
szParameter[0] = '\0';
if ((ch = Bgetc(bp)) == -1)
return ecEndOfFile;
if (!isalpha(ch)) // a control symbol; no delimiter.
{
szKeyword[0] = (char) ch;
szKeyword[1] = '\0';
return ecTranslateKeyword(szKeyword, 0, fParam);
}
for (pch = szKeyword; isalpha(ch); ch = Bgetc(bp))
*pch++ = (char) ch;
*pch = '\0';
if (ch == '-')
{
fNeg = fTrue;
if ((ch = Bgetc(bp)) == -1)
return ecEndOfFile;
}
if (isdigit(ch))
{
fParam = fTrue; // a digit after the control means we have a parameter
for (pch = szParameter; isdigit(ch); ch = Bgetc(bp))
*pch++ = (char) ch;
*pch = '\0';
param = atoi(szParameter);
if (fNeg)
param = -param;
lParam = atol(szParameter);
if (fNeg)
param = -param;
}
if (ch != ' ')
Bungetc(bp);
return ecTranslateKeyword(szKeyword, param, fParam);
}
//
// %%Function: ecParseChar
//
// Route the character to the appropriate destination stream.
//
int
ecParseChar(int ch)
{
if (ris == risBin && --cbBin <= 0)
ris = risNorm;
switch (rds)
{
case rdsSkip:
// Toss this character.
return ecOK;
case rdsNorm:
// Output a character. Properties are valid at this point.
return ecPrintChar(ch);
default:
// handle other destinations....
return ecOK;
}
}
//
// %%Function: ecPrintChar
//
// Send a character to the output file.
//
int
ecPrintChar(int ch)
{
// unfortunately, we do not do a whole lot here as far as layout goes...
Bputc(&bout, ch);
return ecOK;
}
|