/*
* stylelist.c
* Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
*
* Description:
* Build, read and destroy a list of Word style information
*/
#include <stdlib.h>
#include <stddef.h>
#include <ctype.h>
#include "antiword.h"
/*
* Private structure to hide the way the information
* is stored from the rest of the program
*/
typedef struct style_mem_tag {
style_block_type tInfo;
ULONG ulSequenceNumber;
struct style_mem_tag *pNext;
} style_mem_type;
/* Variables needed to write the Style Information List */
static style_mem_type *pAnchor = NULL;
static style_mem_type *pStyleLast = NULL;
/* The type of conversion */
static conversion_type eConversionType = conversion_unknown;
/* The character set encoding */
static encoding_type eEncoding = encoding_neutral;
/* Values for efficiency reasons */
static const style_mem_type *pMidPtr = NULL;
static BOOL bMoveMidPtr = FALSE;
static BOOL bInSequence = TRUE;
/*
* vDestroyStyleInfoList - destroy the Style Information List
*/
void
vDestroyStyleInfoList(void)
{
style_mem_type *pCurr, *pNext;
DBG_MSG("vDestroyStyleInfoList");
/* Free the Style Information List */
pCurr = pAnchor;
while (pCurr != NULL) {
pNext = pCurr->pNext;
pCurr = xfree(pCurr);
pCurr = pNext;
}
pAnchor = NULL;
/* Reset all control variables */
pStyleLast = NULL;
pMidPtr = NULL;
bMoveMidPtr = FALSE;
bInSequence = TRUE;
} /* end of vDestroyStyleInfoList */
/*
* vConvertListCharacter - convert the list character
*/
static void
vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
{
options_type tOptions;
size_t tLen;
fail(szListChar == NULL);
fail(szListChar[0] != '\0');
if (usListChar < 0x80 && isprint((int)usListChar)) {
DBG_CHR_C(isalnum((int)usListChar), usListChar);
szListChar[0] = (char)usListChar;
szListChar[1] = '\0';
return;
}
if (ucNFC != LIST_SPECIAL &&
ucNFC != LIST_SPECIAL2 &&
ucNFC != LIST_BULLETS) {
szListChar[0] = '.';
szListChar[1] = '\0';
return;
}
if (eConversionType == conversion_unknown ||
eEncoding == encoding_neutral) {
vGetOptions(&tOptions);
eConversionType = tOptions.eConversionType;
eEncoding = tOptions.eEncoding;
}
switch (usListChar) {
case 0x0000: case 0x00b7: case 0x00fe: case 0xf021: case 0xf043:
case 0xf06c: case 0xf093: case 0xf0b7:
usListChar = 0x2022; /* BULLET */
break;
case 0x0096: case 0xf02d:
usListChar = 0x2013; /* EN DASH */
break;
case 0x00a8:
usListChar = 0x2666; /* BLACK DIAMOND SUIT */
break;
case 0x00de:
usListChar = 0x21d2; /* RIGHTWARDS DOUBLE ARROW */
break;
case 0x00e0: case 0xf074:
usListChar = 0x25ca; /* LOZENGE */
break;
case 0x00e1:
usListChar = 0x2329; /* LEFT ANGLE BRACKET */
break;
case 0xf020:
usListChar = 0x0020; /* SPACE */
break;
case 0xf041:
usListChar = 0x270c; /* VICTORY HAND */
break;
case 0xf066:
usListChar = 0x03d5; /* GREEK PHI SYMBOL */
break;
case 0xf06e:
usListChar = 0x25a0; /* BLACK SQUARE */
break;
case 0xf06f: case 0xf070: case 0xf0a8:
usListChar = 0x25a1; /* WHITE SQUARE */
break;
case 0xf071:
usListChar = 0x2751; /* LOWER RIGHT SHADOWED WHITE SQUARE */
break;
case 0xf075: case 0xf077:
usListChar = 0x25c6; /* BLACK DIAMOND */
break;
case 0xf076:
usListChar = 0x2756; /* BLACK DIAMOND MINUS WHITE X */
break;
case 0xf0a7:
usListChar = 0x25aa; /* BLACK SMALL SQUARE */
break;
case 0xf0d8:
usListChar = 0x27a2; /* RIGHTWARDS ARROWHEAD */
break;
case 0xf0e5:
usListChar = 0x2199; /* SOUTH WEST ARROW */
break;
case 0xf0f0:
usListChar = 0x21e8; /* RIGHTWARDS WHITE ARROW */
break;
case 0xf0fc:
usListChar = 0x2713; /* CHECK MARK */
break;
default:
if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
(usListChar < 0x80 && !isprint((int)usListChar))) {
/*
* All remaining private area characters and all
* remaining non-printable ASCII characters to their
* default bullet character
*/
DBG_HEX(usListChar);
DBG_FIXME();
if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
usListChar = 0x2190; /* LEFTWARDS ARROW */
} else {
usListChar = 0x2022; /* BULLET */
}
}
break;
}
if (eEncoding == encoding_utf_8) {
tLen = tUcs2Utf8(usListChar, szListChar, 4);
szListChar[tLen] = '\0';
} else {
switch (usListChar) {
case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
case 0x2751:
szListChar[0] = 'o';
break;
case 0x2013: case 0x2500:
szListChar[0] = '-';
break;
case 0x2190: case 0x2199: case 0x2329:
szListChar[0] = '<';
break;
case 0x21d2:
szListChar[0] = '=';
break;
case 0x21e8: case 0x27a2:
szListChar[0] = '>';
break;
case 0x25a0: case 0x25aa:
szListChar[0] = '.';
break;
case 0x2666:
szListChar[0] = OUR_DIAMOND;
break;
case 0x270c:
szListChar[0] = 'x';
break;
case 0x2713:
szListChar[0] = 'V';
break;
case 0x2756:
szListChar[0] = '*';
break;
case 0x2022:
default:
vGetBulletValue(eConversionType, eEncoding,
szListChar, 2);
break;
}
tLen = 1;
}
szListChar[tLen] = '\0';
} /* end of vConvertListCharacter */
/*
* eGetNumType - get the level type from the given level number
*
* Returns the level type
*/
level_type_enum
eGetNumType(UCHAR ucNumLevel)
{
switch (ucNumLevel) {
case 1: case 2: case 3: case 4: case 5:
case 6: case 7: case 8: case 9:
return level_type_outline;
case 10:
return level_type_numbering;
case 11:
return level_type_sequence;
case 12:
return level_type_pause;
default:
return level_type_none;
}
} /* end of eGetNumType */
/*
* vCorrectStyleValues - correct style values that Antiword can't use
*/
void
vCorrectStyleValues(style_block_type *pStyleBlock)
{
if (pStyleBlock->usBeforeIndent > 0x7fff) {
pStyleBlock->usBeforeIndent = 0;
} else if (pStyleBlock->usBeforeIndent > 2160) {
/* 2160 twips = 1.5 inches or 38.1 mm */
DBG_DEC(pStyleBlock->usBeforeIndent);
pStyleBlock->usBeforeIndent = 2160;
}
if (pStyleBlock->usIstd >= 1 &&
pStyleBlock->usIstd <= 9 &&
pStyleBlock->usBeforeIndent < HEADING_GAP) {
NO_DBG_DEC(pStyleBlock->usBeforeIndent);
pStyleBlock->usBeforeIndent = HEADING_GAP;
}
if (pStyleBlock->usAfterIndent > 0x7fff) {
pStyleBlock->usAfterIndent = 0;
} else if (pStyleBlock->usAfterIndent > 2160) {
/* 2160 twips = 1.5 inches or 38.1 mm */
DBG_DEC(pStyleBlock->usAfterIndent);
pStyleBlock->usAfterIndent = 2160;
}
if (pStyleBlock->usIstd >= 1 &&
pStyleBlock->usIstd <= 9 &&
pStyleBlock->usAfterIndent < HEADING_GAP) {
NO_DBG_DEC(pStyleBlock->usAfterIndent);
pStyleBlock->usAfterIndent = HEADING_GAP;
}
if (pStyleBlock->sLeftIndent < 0) {
pStyleBlock->sLeftIndent = 0;
}
if (pStyleBlock->sRightIndent > 0) {
pStyleBlock->sRightIndent = 0;
}
vConvertListCharacter(pStyleBlock->ucNFC,
pStyleBlock->usListChar,
pStyleBlock->szListChar);
} /* end of vCorrectStyleValues */
/*
* vAdd2StyleInfoList - Add an element to the Style Information List
*/
void
vAdd2StyleInfoList(const style_block_type *pStyleBlock)
{
style_mem_type *pListMember;
fail(pStyleBlock == NULL);
NO_DBG_MSG("bAdd2StyleInfoList");
if (pStyleBlock->ulFileOffset == FC_INVALID) {
NO_DBG_DEC(pStyleBlock->usIstd);
return;
}
NO_DBG_HEX(pStyleBlock->ulFileOffset);
NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
pStyleBlock->sLeftIndent);
NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
pStyleBlock->sRightIndent);
NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
pStyleBlock->usAfterIndent);
NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
NO_DBG_DEC(pStyleBlock->ucNFC);
NO_DBG_HEX(pStyleBlock->usListChar);
if (pStyleLast != NULL &&
pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
/*
* If two consecutive styles share the same
* offset, remember only the last style
*/
fail(pStyleLast->pNext != NULL);
pStyleLast->tInfo = *pStyleBlock;
/* Correct the values where needed */
vCorrectStyleValues(&pStyleLast->tInfo);
return;
}
/* Create list member */
pListMember = xmalloc(sizeof(style_mem_type));
/* Fill the list member */
pListMember->tInfo = *pStyleBlock;
pListMember->pNext = NULL;
/* Add the sequence number */
pListMember->ulSequenceNumber =
ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
/* Correct the values where needed */
vCorrectStyleValues(&pListMember->tInfo);
/* Add the new member to the list */
if (pAnchor == NULL) {
pAnchor = pListMember;
/* For efficiency */
pMidPtr = pAnchor;
bMoveMidPtr = FALSE;
bInSequence = TRUE;
} else {
fail(pStyleLast == NULL);
pStyleLast->pNext = pListMember;
/* For efficiency */
if (bMoveMidPtr) {
pMidPtr = pMidPtr->pNext;
bMoveMidPtr = FALSE;
} else {
bMoveMidPtr = TRUE;
}
if (bInSequence) {
bInSequence = pListMember->ulSequenceNumber >
pStyleLast->ulSequenceNumber;
}
}
pStyleLast = pListMember;
} /* end of vAdd2StyleInfoList */
/*
* Get the record that follows the given recored in the Style Information List
*/
const style_block_type *
pGetNextStyleInfoListItem(const style_block_type *pCurr)
{
const style_mem_type *pRecord;
size_t tOffset;
if (pCurr == NULL) {
if (pAnchor == NULL) {
/* There are no records */
return NULL;
}
/* The first record is the only one without a predecessor */
return &pAnchor->tInfo;
}
tOffset = offsetof(style_mem_type, tInfo);
/* Many casts to prevent alignment warnings */
pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
fail(pCurr != &pRecord->tInfo);
if (pRecord->pNext == NULL) {
/* The last record has no successor */
return NULL;
}
return &pRecord->pNext->tInfo;
} /* end of pGetNextStyleInfoListItem */
/*
* Get the next text style
*/
const style_block_type *
pGetNextTextStyle(const style_block_type *pCurr)
{
const style_block_type *pRecord;
pRecord = pCurr;
do {
pRecord = pGetNextStyleInfoListItem(pRecord);
} while (pRecord != NULL &&
(pRecord->eListID == hdrftr_list ||
pRecord->eListID == macro_list ||
pRecord->eListID == annotation_list));
return pRecord;
} /* end of pGetNextTextStyle */
/*
* usGetIstd - get the istd that belongs to the given file offset
*/
USHORT
usGetIstd(ULONG ulFileOffset)
{
const style_mem_type *pCurr, *pBest, *pStart;
ULONG ulSeq, ulBest;
ulSeq = ulGetSeqNumber(ulFileOffset);
if (ulSeq == FC_INVALID) {
return ISTD_NORMAL;
}
NO_DBG_HEX(ulFileOffset);
NO_DBG_DEC(ulSeq);
if (bInSequence &&
pMidPtr != NULL &&
ulSeq > pMidPtr->ulSequenceNumber) {
/* The istd is in the second half of the chained list */
pStart = pMidPtr;
} else {
pStart = pAnchor;
}
pBest = NULL;
ulBest = 0;
for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
if (pCurr->ulSequenceNumber != FC_INVALID &&
(pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
pCurr->ulSequenceNumber <= ulSeq) {
pBest = pCurr;
ulBest = pCurr->ulSequenceNumber;
}
if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
break;
}
}
NO_DBG_DEC(ulBest);
if (pBest == NULL) {
return ISTD_NORMAL;
}
NO_DBG_DEC(pBest->tInfo.usIstd);
return pBest->tInfo.usIstd;
} /* end of usGetIstd */
/*
* bStyleImpliesList - does style info implies being part of a list
*
* Decide whether the style information implies that the given paragraph is
* part of a list
*
* Returns TRUE when the paragraph is part of a list, otherwise FALSE
*/
BOOL
bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
{
fail(pStyle == NULL);
fail(iWordVersion < 0);
if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
/* These are heading levels */
return FALSE;
}
if (iWordVersion < 8) {
/* Check for old style lists */
return pStyle->ucNumLevel != 0;
}
/* Check for new style lists */
return pStyle->usListIndex != 0;
} /* end of bStyleImpliesList */
|