Plan 9 from Bell Labs’s /usr/web/sources/plan9/sys/src/cmd/aux/antiword/wordwin.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 * wordwin.c
 * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
 *
 * Description:
 * Deal with the WIN internals of a MS Word file
 */

#include "antiword.h"


/*
 * bGetDocumentText - make a list of the text blocks of a Word document
 *
 * Return TRUE when succesful, otherwise FALSE
 */
static BOOL
bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
{
	text_block_type	tTextBlock;
	ULONG	ulBeginOfText;
	ULONG	ulTextLen, ulFootnoteLen;
	ULONG	ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
	UINT	uiQuickSaves;
	USHORT	usDocStatus;
	BOOL	bTemplate, bFastSaved, bEncrypted, bSuccess;

	fail(pFile == NULL);
	fail(aucHeader == NULL);

	DBG_MSG("bGetDocumentText");

	/* Get the status flags from the header */
	usDocStatus = usGetWord(0x0a, aucHeader);
	DBG_HEX(usDocStatus);
	bTemplate = (usDocStatus & BIT(0)) != 0;
	DBG_MSG_C(bTemplate, "This document is a Template");
	bFastSaved = (usDocStatus & BIT(2)) != 0;
	uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
	DBG_DEC_C(bFastSaved, uiQuickSaves);
	if (bFastSaved) {
		werr(0, "Word2: fast saved documents are not supported yet");
		return FALSE;
	}
	bEncrypted = (usDocStatus & BIT(8)) != 0;
	if (bEncrypted) {
		werr(0, "Encrypted documents are not supported");
		return FALSE;
	}

	/* Get length information */
	ulBeginOfText = ulGetLong(0x18, aucHeader);
	DBG_HEX(ulBeginOfText);
	ulTextLen = ulGetLong(0x34, aucHeader);
	ulFootnoteLen = ulGetLong(0x38, aucHeader);
	ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
	ulMacroLen = ulGetLong(0x40, aucHeader);
	ulAnnotationLen = ulGetLong(0x44, aucHeader);
	DBG_DEC(ulTextLen);
	DBG_DEC(ulFootnoteLen);
	DBG_DEC(ulHdrFtrLen);
	DBG_DEC(ulMacroLen);
	DBG_DEC(ulAnnotationLen);
	if (bFastSaved) {
		bSuccess = FALSE;
	} else {
		tTextBlock.ulFileOffset = ulBeginOfText;
		tTextBlock.ulCharPos = ulBeginOfText;
		tTextBlock.ulLength = ulTextLen +
				ulFootnoteLen +
				ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
		tTextBlock.bUsesUnicode = FALSE;
		tTextBlock.usPropMod = IGNORE_PROPMOD;
		bSuccess = bAdd2TextBlockList(&tTextBlock);
		DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
		DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
		DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
		DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
		DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
	}

	if (bSuccess) {
		vSplitBlockList(pFile,
				ulTextLen,
				ulFootnoteLen,
				ulHdrFtrLen,
				ulMacroLen,
				ulAnnotationLen,
				0,
				0,
				0,
				FALSE);
	} else {
		vDestroyTextBlockList();
		werr(0, "I can't find the text of this document");
	}
	return bSuccess;
} /* end of bGetDocumentText */

/*
 * vGetDocumentData - make a list of the data blocks of a Word document
 */
static void
vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
{
	data_block_type	tDataBlock;
	options_type	tOptions;
	ULONG	ulEndOfText, ulBeginCharInfo;
	BOOL	bFastSaved, bHasImages, bSuccess;
	USHORT	usDocStatus;

	/* Get the options */
	vGetOptions(&tOptions);

	/* Get the status flags from the header */
	usDocStatus = usGetWord(0x0a, aucHeader);
	DBG_HEX(usDocStatus);
	bFastSaved = (usDocStatus & BIT(2)) != 0;
	bHasImages = (usDocStatus & BIT(3)) != 0;

	if (!bHasImages ||
	    tOptions.eConversionType == conversion_text ||
	    tOptions.eConversionType == conversion_fmt_text ||
	    tOptions.eConversionType == conversion_xml ||
	    tOptions.eImageLevel == level_no_images) {
		/*
		 * No images in the document or text-only output or
		 * no images wanted, so no data blocks will be needed
		 */
		vDestroyDataBlockList();
		return;
	}

	if (bFastSaved) {
		bSuccess = FALSE;
	} else {
		/* This datablock is too big, but it contains all images */
		ulEndOfText = ulGetLong(0x1c, aucHeader);
		DBG_HEX(ulEndOfText);
		ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
		DBG_HEX(ulBeginCharInfo);
		if (ulBeginCharInfo > ulEndOfText) {
			tDataBlock.ulFileOffset = ulEndOfText;
			tDataBlock.ulDataPos = ulEndOfText;
			tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
			bSuccess = bAdd2DataBlockList(&tDataBlock);
			DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
			DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
			DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
		} else {
			bSuccess = ulBeginCharInfo == ulEndOfText;
		}
	}

	if (!bSuccess) {
		vDestroyDataBlockList();
		werr(0, "I can't find the data of this document");
	}
} /* end of vGetDocumentData */

/*
 * iInitDocumentWIN - initialize an WIN document
 *
 * Returns the version of Word that made the document or -1
 */
int
iInitDocumentWIN(FILE *pFile, long lFilesize)
{
	int	iWordVersion;
	BOOL	bSuccess;
	USHORT	usIdent;
	UCHAR	aucHeader[384];

	fail(pFile == NULL);

	if (lFilesize < 384) {
		return -1;
	}

	/* Read the headerblock */
	if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
		return -1;
	}
	/* Get the "magic number" from the header */
	usIdent = usGetWord(0x00, aucHeader);
	DBG_HEX(usIdent);
	fail(usIdent != 0xa59b &&	/* WinWord 1.x */
		usIdent != 0xa5db);	/* WinWord 2.0 */
	iWordVersion = iGetVersionNumber(aucHeader);
	if (iWordVersion != 1 && iWordVersion != 2) {
		werr(0, "This file is not from ''Win Word 1 or 2'.");
		return -1;
	}
	bSuccess = bGetDocumentText(pFile, aucHeader);
	if (bSuccess) {
		vGetDocumentData(pFile, aucHeader);
		vGetPropertyInfo(pFile, NULL,
				NULL, 0, NULL, 0,
				aucHeader, iWordVersion);
		vSetDefaultTabWidth(pFile, NULL,
				NULL, 0, NULL, 0,
				aucHeader, iWordVersion);
		vGetNotesInfo(pFile, NULL,
				NULL, 0, NULL, 0,
				aucHeader, iWordVersion);
	}
	return bSuccess ? iWordVersion : -1;
} /* end of iInitDocumentWIN */

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].