/*
* prop0.c
* Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
*
* Description:
* Read the property information from a Word for DOS file
*/
#include <string.h>
#include <time.h>
#include "antiword.h"
/*
* tConvertDosDate - convert DOS date format
*
* returns Unix time_t or -1
*/
static time_t
tConvertDosDate(const char *szDosDate)
{
struct tm tTime;
const char *pcTmp;
time_t tResult;
memset(&tTime, 0, sizeof(tTime));
pcTmp = szDosDate;
/* Get the month */
if (!isdigit(*pcTmp)) {
return (time_t)-1;
}
tTime.tm_mon = (int)(*pcTmp - '0');
pcTmp++;
if (isdigit(*pcTmp)) {
tTime.tm_mon *= 10;
tTime.tm_mon += (int)(*pcTmp - '0');
pcTmp++;
}
/* Get the first separater */
if (isalnum(*pcTmp)) {
return (time_t)-1;
}
pcTmp++;
/* Get the day */
if (!isdigit(*pcTmp)) {
return (time_t)-1;
}
tTime.tm_mday = (int)(*pcTmp - '0');
pcTmp++;
if (isdigit(*pcTmp)) {
tTime.tm_mday *= 10;
tTime.tm_mday += (int)(*pcTmp - '0');
pcTmp++;
}
/* Get the second separater */
if (isalnum(*pcTmp)) {
return (time_t)-1;
}
pcTmp++;
/* Get the year */
if (!isdigit(*pcTmp)) {
return (time_t)-1;
}
tTime.tm_year = (int)(*pcTmp - '0');
pcTmp++;
if (isdigit(*pcTmp)) {
tTime.tm_year *= 10;
tTime.tm_year += (int)(*pcTmp - '0');
pcTmp++;
}
/* Check the values */
if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
return (time_t)-1;
}
/* Correct the values */
tTime.tm_mon--; /* From 01-12 to 00-11 */
if (tTime.tm_year < 80) {
tTime.tm_year += 100; /* 00 means 2000 is 100 */
}
tTime.tm_isdst = -1;
tResult = mktime(&tTime);
NO_DBG_MSG(ctime(&tResult));
return tResult;
} /* end of tConvertDosDate */
/*
* Build the lists with Document Property Information for Word for DOS files
*/
void
vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
{
document_block_type tDocument;
UCHAR *aucBuffer;
ULONG ulBeginSumdInfo, ulBeginNextBlock;
size_t tLen;
USHORT usOffset;
tDocument.ucHdrFtrSpecification = 0;
tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
tDocument.tCreateDate = (time_t)-1;
tDocument.tRevisedDate = (time_t)-1;
ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
DBG_HEX(ulBeginSumdInfo);
ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
DBG_HEX(ulBeginNextBlock);
if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
/* There is a summary information block */
tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
aucBuffer = xmalloc(tLen);
/* Read the summary information block */
if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
usOffset = usGetWord(12, aucBuffer);
if (aucBuffer[usOffset] != 0) {
NO_DBG_STRN(aucBuffer + usOffset, 8);
tDocument.tRevisedDate =
tConvertDosDate((char *)aucBuffer + usOffset);
}
usOffset = usGetWord(14, aucBuffer);
if (aucBuffer[usOffset] != 0) {
NO_DBG_STRN(aucBuffer + usOffset, 8);
tDocument.tCreateDate =
tConvertDosDate((char *)aucBuffer + usOffset);
}
}
aucBuffer = xfree(aucBuffer);
}
vCreateDocumentInfoList(&tDocument);
} /* end of vGet0DopInfo */
/*
* Fill the section information block with information
* from a Word for DOS file.
*/
static void
vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
section_block_type *pSection)
{
USHORT usCcol;
UCHAR ucTmp;
fail(aucGrpprl == NULL || pSection == NULL);
if (tBytes < 2) {
return;
}
/* bkc */
ucTmp = ucGetByte(1, aucGrpprl);
DBG_HEX(ucTmp);
ucTmp &= 0x07;
DBG_HEX(ucTmp);
pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
if (tBytes < 18) {
return;
}
/* ccolM1 */
usCcol = (USHORT)ucGetByte(17, aucGrpprl);
DBG_DEC(usCcol);
} /* end of vGet0SectionInfo */
/*
* Build the lists with Section Property Information for Word for DOS files
*/
void
vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
{
section_block_type tSection;
UCHAR *aucBuffer;
ULONG ulBeginOfText, ulTextOffset, ulBeginSectInfo;
ULONG ulCharPos, ulSectPage, ulBeginNextBlock;
size_t tSectInfoLen, tIndex, tSections, tBytes;
UCHAR aucTmp[2], aucFpage[35];
fail(pFile == NULL || aucHeader == NULL);
ulBeginOfText = 128;
NO_DBG_HEX(ulBeginOfText);
ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
DBG_HEX(ulBeginSectInfo);
ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
DBG_HEX(ulBeginNextBlock);
if (ulBeginSectInfo == ulBeginNextBlock) {
/* There is no section information block */
return;
}
/* Get the the number of sections */
if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
return;
}
tSections = (size_t)usGetWord(0, aucTmp);
NO_DBG_DEC(tSections);
/* Read the Section Descriptors */
tSectInfoLen = 10 * tSections;
NO_DBG_DEC(tSectInfoLen);
aucBuffer = xmalloc(tSectInfoLen);
if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
aucBuffer = xfree(aucBuffer);
return;
}
NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
/* Read the Section Properties */
for (tIndex = 0; tIndex < tSections; tIndex++) {
ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
NO_DBG_HEX(ulTextOffset);
ulCharPos = ulBeginOfText + ulTextOffset;
NO_DBG_HEX(ulTextOffset);
ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
NO_DBG_HEX(ulSectPage);
if (ulSectPage == FC_INVALID || /* Must use defaults */
ulSectPage < 128 || /* Should not happen */
ulSectPage >= ulBeginSectInfo) { /* Should not happen */
DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
vDefault2SectionInfoList(ulCharPos);
continue;
}
/* Get the number of bytes to read */
if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
continue;
}
tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
NO_DBG_DEC(tBytes);
if (tBytes > sizeof(aucFpage)) {
DBG_DEC(tBytes);
tBytes = sizeof(aucFpage);
}
/* Read the bytes */
if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
continue;
}
NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
/* Process the bytes */
vGetDefaultSection(&tSection);
vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
vAdd2SectionInfoList(&tSection, ulCharPos);
}
/* Clean up before you leave */
aucBuffer = xfree(aucBuffer);
} /* end of vGet0SepInfo */
/*
* Fill the style information block with information
* from a Word for DOS file.
*/
static void
vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
{
int iBytes;
UCHAR ucTmp;
fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
pStyle->usIstdNext = ISTD_NORMAL;
iBytes = (int)ucGetByte(iFodo, aucGrpprl);
if (iBytes < 1) {
return;
}
/* stc if styled */
ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
if ((ucTmp & BIT(0)) != 0) {
ucTmp >>= 1;
if (ucTmp >= 88 && ucTmp <= 94) {
/* Header levels 1 through 7 */
pStyle->usIstd = ucTmp - 87;
pStyle->ucNumLevel = 1;
}
}
if (iBytes < 2) {
return;
}
/* jc */
ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
pStyle->ucAlignment = ucTmp & 0x02;
if (iBytes < 3) {
return;
}
/* stc */
ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
ucTmp &= 0x7f;
if (ucTmp >= 88 && ucTmp <= 94) {
/* Header levels 1 through 7 */
pStyle->usIstd = ucTmp - 87;
pStyle->ucNumLevel = 1;
}
if (iBytes < 6) {
return;
}
/* dxaRight */
pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
NO_DBG_DEC(pStyle->sRightIndent);
if (iBytes < 8) {
return;
}
/* dxaLeft */
pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
NO_DBG_DEC(pStyle->sLeftIndent);
if (iBytes < 10) {
return;
}
/* dxaLeft1 */
pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
NO_DBG_DEC(pStyle->sLeftIndent1);
if (iBytes < 14) {
return;
}
/* dyaBefore */
pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
NO_DBG_DEC(pStyle->usBeforeIndent);
if (iBytes < 16) {
return;
}
/* dyaAfter */
pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
NO_DBG_DEC(pStyle->usAfterIndent);
} /* end of vGet0StyleInfo */
/*
* Build the lists with Paragraph Information for Word for DOS files
*/
void
vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
{
style_block_type tStyle;
ULONG ulBeginParfInfo, ulCharPos, ulCharPosNext;
int iIndex, iRun, iFodo;
UCHAR aucFpage[128];
fail(pFile == NULL || aucHeader == NULL);
ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
NO_DBG_HEX(ulBeginParfInfo);
do {
if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
return;
}
NO_DBG_PRINT_BLOCK(aucFpage, 128);
ulCharPosNext = ulGetLong(0, aucFpage);
iRun = (int)ucGetByte(0x7f, aucFpage);
NO_DBG_DEC(iRun);
for (iIndex = 0; iIndex < iRun; iIndex++) {
iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
if (iFodo <= 0 || iFodo > 0x79) {
DBG_DEC_C(iFodo != (int)0xffff, iFodo);
continue;
}
vFillStyleFromStylesheet(0, &tStyle);
vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
ulCharPos = ulCharPosNext;
ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
tStyle.ulFileOffset = ulCharPos;
vAdd2StyleInfoList(&tStyle);
}
ulBeginParfInfo += 128;
} while (ulCharPosNext == ulBeginParfInfo);
} /* end of vGet0PapInfo */
/*
* Fill the font information block with information
* from a Word for DOS file.
*/
static void
vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
{
int iBytes;
UCHAR ucTmp;
fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
iBytes = (int)ucGetByte(iFodo, aucGrpprl);
if (iBytes < 2) {
return;
}
/* fBold, fItalic, cFtc */
ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
if ((ucTmp & BIT(0)) != 0) {
pFont->usFontStyle |= FONT_BOLD;
}
if ((ucTmp & BIT(1)) != 0) {
pFont->usFontStyle |= FONT_ITALIC;
}
pFont->ucFontNumber = ucTmp >> 2;
NO_DBG_DEC(pFont->ucFontNumber);
if (iBytes < 3) {
return;
}
/* cHps */
pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
NO_DBG_DEC(pFont->usFontSize);
if (iBytes < 4) {
return;
}
/* cKul, fStrike, fCaps, fSmallCaps, fVanish */
ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
pFont->usFontStyle |= FONT_UNDERLINE;
}
if ((ucTmp & BIT(1)) != 0) {
pFont->usFontStyle |= FONT_STRIKE;
}
if ((ucTmp & BIT(4)) != 0) {
pFont->usFontStyle |= FONT_CAPITALS;
}
if ((ucTmp & BIT(5)) != 0) {
pFont->usFontStyle |= FONT_SMALL_CAPITALS;
}
if ((ucTmp & BIT(7)) != 0) {
pFont->usFontStyle |= FONT_HIDDEN;
}
DBG_HEX(pFont->usFontStyle);
if (iBytes < 6) {
return;
}
/* cIss */
ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
if (ucTmp != 0) {
if (ucTmp < 128) {
pFont->usFontStyle |= FONT_SUPERSCRIPT;
DBG_MSG("Superscript");
} else {
pFont->usFontStyle |= FONT_SUBSCRIPT;
DBG_MSG("Subscript");
}
}
if (iBytes < 7) {
return;
}
/* cIco */
ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
switch (ucTmp & 0x07) {
case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
}
NO_DBG_DEC(pFont->ucFontColor);
} /* end of vGet0FontInfo */
/*
* Build the lists with Character Information for Word for DOS files
*/
void
vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
{
font_block_type tFont;
ULONG ulBeginCharInfo, ulCharPos, ulCharPosNext;
int iIndex, iRun, iFodo;
UCHAR aucFpage[128];
fail(pFile == NULL || aucHeader == NULL);
ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
NO_DBG_HEX(ulBeginCharInfo);
ulBeginCharInfo = ROUND128(ulBeginCharInfo);
NO_DBG_HEX(ulBeginCharInfo);
do {
if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
return;
}
NO_DBG_PRINT_BLOCK(aucFpage, 128);
ulCharPosNext = ulGetLong(0, aucFpage);
iRun = (int)ucGetByte(0x7f, aucFpage);
NO_DBG_DEC(iRun);
for (iIndex = 0; iIndex < iRun; iIndex++) {
iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
if (iFodo <= 0 || iFodo > 0x79) {
DBG_DEC_C(iFodo != (int)0xffff, iFodo);
continue;
}
vFillFontFromStylesheet(0, &tFont);
vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
ulCharPos = ulCharPosNext;
ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
tFont.ulFileOffset = ulCharPos;
vAdd2FontInfoList(&tFont);
}
ulBeginCharInfo += 128;
} while (ulCharPosNext == ulBeginCharInfo);
} /* end of vGet0ChrInfo */
|