Plan 9 from Bell Labs’s /usr/web/sources/patch/sorry/libhtml-win-koi/html.h

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#pragma lib "libhtml.a"
#pragma src "/sys/src/libhtml"

// UTILS
extern uchar*	fromStr(Rune* buf, int n, int chset);
extern Rune*	toStr(uchar* buf, int n, int chset);

// Common LEX and BUILD enums

// Media types
enum
{
	ApplMsword,
	ApplOctets,
	ApplPdf,
	ApplPostscript,
	ApplRtf,
	ApplFramemaker,
	ApplMsexcel,
	ApplMspowerpoint,
	UnknownType,
	Audio32kadpcm,
	AudioBasic,
	ImageCgm,
	ImageG3fax,
	ImageGif,
	ImageIef,
	ImageJpeg,
	ImagePng,
	ImageTiff,
	ImageXBit,
	ImageXBit2,
	ImageXBitmulti,
	ImageXXBitmap,
	ModelVrml,
	MultiDigest,
	MultiMixed,
	TextCss,
	TextEnriched,
	TextHtml,
	TextJavascript,
	TextPlain,
	TextRichtext,
	TextSgml,
	TextTabSeparatedValues,
	TextXml,
	VideoMpeg,
	VideoQuicktime,
	NMEDIATYPES
};

// HTTP methods
enum
{
	HGet,
	HPost
};

// Charsets
enum
{
	UnknownCharset,
	US_Ascii,
	ISO_8859_1,
	UTF_8,
	Unicode,
	WIN_1251,
	KOI8,
	NCHARSETS
};

// Frame Target IDs
enum {
	FTtop,
	FTself,
	FTparent,
	FTblank
};

// LEX
typedef struct Token Token;
typedef struct Attr Attr;

#pragma incomplete Token

// BUILD

typedef struct Item Item;
typedef struct Itext Itext;
typedef struct Irule Irule;
typedef struct Iimage Iimage;
typedef struct Iformfield Iformfield;
typedef struct Itable Itable;
typedef struct Ifloat Ifloat;
typedef struct Ispacer Ispacer;
typedef struct Genattr Genattr;
typedef struct SEvent SEvent;
typedef struct Formfield Formfield;
typedef struct Option Option;
typedef struct Form Form;
typedef struct Table Table;
typedef struct Tablecol Tablecol;
typedef struct Tablerow Tablerow;
typedef struct Tablecell Tablecell;
typedef struct Align Align;
typedef struct Dimen Dimen;
typedef struct Anchor Anchor;
typedef struct DestAnchor DestAnchor;
typedef struct Map Map;
typedef struct Area Area;
typedef struct Background Background;
typedef struct Kidinfo Kidinfo;
typedef struct Docinfo Docinfo;
typedef struct Stack Stack;
typedef struct Pstate Pstate;
typedef struct ItemSource ItemSource;
typedef struct Lay Lay;	// defined in Layout module

#pragma incomplete Lay


// Alignment types
enum {
	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
};

struct Align
{
	uchar	halign;	// one of ALnone, ALleft, etc.
	uchar	valign;	// one of ALnone, ALtop, etc.
};

// A Dimen holds a dimension specification, especially for those
// cases when a number can be followed by a % or a * to indicate
// percentage of total or relative weight.
// Dnone means no dimension was specified

// To fit in a word, use top bits to identify kind, rest for value
enum {
	Dnone =		0,
	Dpixels =		(1<<29),
	Dpercent =	(2<<29),
	Drelative =	(3<<29),
	Dkindmask =	(3<<29),
	Dspecmask =	(~Dkindmask)
};

struct Dimen
{
	int	kindspec;		// kind | spec
};

// Background is either an image or a color.
// If both are set, the image has precedence.
struct Background
{
	Rune*	image;	// url
	int		color;
};


// There are about a half dozen Item variants.
// The all look like this at the start (using Plan 9 C's
// anonymous structure member mechanism),
// and then the tag field dictates what extra fields there are.
struct Item
{
	Item*	next;		// successor in list of items
	int		width;	// width in pixels (0 for floating items)
	int		height;	// height in pixels
	Rectangle r;
	int		ascent;	// ascent (from top to baseline) in pixels
	int		anchorid;	// if nonzero, which anchor we're in
	int		state;	// flags and values (see below)
	Genattr*	genattr;	// generic attributes and events
	int		tag;		// variant discriminator: Itexttag, etc.
};

// Item variant tags
enum {
	Itexttag,
	Iruletag,
	Iimagetag,
	Iformfieldtag,
	Itabletag,
	Ifloattag,
	Ispacertag
};

struct Itext
{
	Item;				// (with tag ==Itexttag)
	Rune*	s;			// the characters
	int		fnt;			// style*NumSize+size (see font stuff, below)
	int		fg;			// Pixel (color) for text
	uchar	voff;			// Voffbias+vertical offset from baseline, in pixels (+ve == down)
	uchar	ul;			// ULnone, ULunder, or ULmid
};

struct Irule
{
	Item;				// (with tag ==Iruletag)
	uchar	align;		// alignment spec
	uchar	noshade;		// if true, don't shade
	int		size;			// size attr (rule height)
	Dimen	wspec;		// width spec
};


struct Iimage
{
	Item;				// (with tag ==Iimagetag)
	Rune*	imsrc;		// image src url
	int		imwidth;		// spec width (actual, if no spec)
	int		imheight;		// spec height (actual, if no spec)
	Rune*	altrep;		// alternate representation, in absence of image
	Map*	map;			// if non-nil, client side map
	int		ctlid;			// if animated
	uchar	align;		// vertical alignment
	uchar	hspace;		// in pixels; buffer space on each side
	uchar	vspace;		// in pixels; buffer space on top and bottom
	uchar	border;		// in pixels: border width to draw around image
	Iimage*	nextimage;	// next in list of document's images
	void*	aux;
};


struct Iformfield
{
	Item;				// (with tag ==Iformfieldtag)
	Formfield*	formfield;
};


struct Itable
{
	Item;				// (with tag ==Itabletag)
	Table*	table;
};


struct Ifloat
{
	Item;				// (with tag ==Ifloattag)
	Item*	item;			// table or image item that floats
	int		x;			// x coord of top (from right, if ALright)
	int		y;			// y coord of top
	uchar	side;			// margin it floats to: ALleft or ALright
	uchar	infloats;		// true if this has been added to a lay.floats
	Ifloat*	nextfloat;		// in list of floats
};


struct Ispacer
{
	Item;				// (with tag ==Ispacertag)
	int		spkind;		// ISPnull, etc.
};

// Item state flags and value fields
enum {
	IFbrk =			0x80000000,	// forced break before this item
	IFbrksp =			0x40000000,	// add 1 line space to break (IFbrk set too)
	IFnobrk =			0x20000000,	// break not allowed before this item
	IFcleft =			0x10000000,	// clear left floats (IFbrk set too)
	IFcright =			0x08000000,	// clear right floats (IFbrk set too)
	IFwrap =			0x04000000,	// in a wrapping (non-pre) line
	IFhang =			0x02000000,	// in a hanging (into left indent) item
	IFrjust =			0x01000000,	// right justify current line
	IFcjust =			0x00800000,	// center justify current line
	IFsmap =			0x00400000,	// image is server-side map
	IFindentshift =		8,
	IFindentmask =		(255<<IFindentshift),	// current indent, in tab stops
	IFhangmask =		255			// current hang into left indent, in 1/10th tabstops
};

// Bias added to Itext's voff field
enum { Voffbias = 128 };

// Spacer kinds
enum {
	ISPnull,			// 0 height and width
	ISPvline,			// height and ascent of current font
	ISPhspace,		// width of space in current font
	ISPgeneral		// other purposes (e.g., between markers and list)
};

// Generic attributes and events (not many elements will have any of these set)
struct Genattr
{
	Rune*	id;
	Rune*	class;
	Rune*	style;
	Rune*	title;
	SEvent*	events;
};

struct SEvent
{
	SEvent*	next;		// in list of events
	int		type;		// SEonblur, etc.
	Rune*	script;
};

enum {
	SEonblur, SEonchange, SEonclick, SEondblclick,
	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
	SEonmousedown, SEonmousemove, SEonmouseout,
	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
	SEonsubmit, SEonunload,
	Numscriptev
};

// Form field types
enum {
	Ftext,
	Fpassword,
	Fcheckbox,
	Fradio,
	Fsubmit,
	Fhidden,
	Fimage,
	Freset,
	Ffile,
	Fbutton,
	Fselect,
	Ftextarea
};

// Information about a field in a form
struct Formfield
{
	Formfield*	next;		// in list of fields for a form
	int			ftype;	// Ftext, Fpassword, etc.
	int			fieldid;	// serial no. of field within its form
	Form*		form;	// containing form
	Rune*		name;	// name attr
	Rune*		value;	// value attr
	int			size;		// size attr
	int			maxlength;	// maxlength attr
	int			rows;	// rows attr
	int			cols;		// cols attr
	uchar		flags;	// FFchecked, etc.
	Option*		options;	// for Fselect fields
	Item*		image;	// image item, for Fimage fields
	int			ctlid;		// identifies control for this field in layout
	SEvent*		events;	// same as genattr->events of containing item
	void*		aux;
};

enum {
	FFchecked =	(1<<7),
	FFmultiple =	(1<<6)
};

// Option holds info about an option in a "select" form field
struct Option
{
	Option*	next;			// next in list of options for a field
	int		selected;		// true if selected initially
	Rune*	value;		// value attr
	Rune*	display;		// display string
};

// Form holds info about a form
struct Form
{
	Form*		next;		// in list of forms for document
	int			formid;	// serial no. of form within its doc
	Rune*		name;	// name or id attr (netscape uses name, HTML 4.0 uses id)
	Rune*		action;	// action attr
	int			target;	// target attr as targetid
	int			method;	// HGet or HPost
	int			nfields;	// number of fields
	Formfield*	fields;	// field's forms, in input order
};

// Flags used in various table structures
enum {
	TFparsing =	(1<<7),
	TFnowrap =	(1<<6),
	TFisth =		(1<<5)
};


// Information about a table
struct Table
{
	Table*		next;			// next in list of document's tables
	int			tableid;		// serial no. of table within its doc
	Tablerow*	rows;		// array of row specs (list during parsing)
	int			nrow;		// total number of rows
	Tablecol*		cols;			// array of column specs
	int			ncol;			// total number of columns
	Tablecell*		cells;			// list of unique cells
	int			ncell;		// total number of cells
	Tablecell***	grid;			// 2-D array of cells
	Align		align;		// alignment spec for whole table
	Dimen		width;		// width spec for whole table
	int			border;		// border attr
	int			cellspacing;	// cellspacing attr
	int			cellpadding;	// cellpadding attr
	Background	background;	// table background
	Item*		caption;		// linked list of Items, giving caption
	uchar		caption_place;	// ALtop or ALbottom
	Lay*			caption_lay;	// layout of caption
	int			totw;			// total width
	int			toth;			// total height
	int			caph;		// caption height
	int			availw;		// used for previous 3 sizes
	Token*		tabletok;		// token that started the table
	uchar		flags;		// Lchanged, perhaps
};


struct Tablecol
{
	int		width;
	Align	align;
	Point		pos;
};


struct Tablerow
{
	Tablerow*	next;			// Next in list of rows, during parsing
	Tablecell*		cells;			// Cells in row, linked through nextinrow
	int			height;
	int			ascent;
	Align		align;
	Background	background;
	Point			pos;
	uchar		flags;		// 0 or TFparsing
};


// A Tablecell is one cell of a table.
// It may span multiple rows and multiple columns.
// Cells are linked on two lists: the list for all the cells of
// a document (the next pointers), and the list of all the
// cells that start in a given row (the nextinrow pointers)
struct Tablecell
{
	Tablecell*		next;			// next in list of table's cells
	Tablecell*		nextinrow;	// next in list of row's cells
	int			cellid;		// serial no. of cell within table
	Item*		content;		// contents before layout
	Lay*			lay;			// layout of cell
	int			rowspan;		// number of rows spanned by this cell
	int			colspan;		// number of cols spanned by this cell
	Align		align;		// alignment spec
	uchar		flags;		// TFparsing, TFnowrap, TFisth
	Dimen		wspec;		// suggested width
	int			hspec;		// suggested height
	Background	background;	// cell background
	int			minw;		// minimum possible width
	int			maxw;		// maximum width
	int			ascent;		// cell's ascent
	int			row;			// row of upper left corner
	int			col;			// col of upper left corner
	Point			pos;			// nw corner of cell contents, in cell
	Rectangle		r;
};

// Anchor is for info about hyperlinks that go somewhere
struct Anchor
{
	Anchor*		next;		// next in list of document's anchors
	int			index;	// serial no. of anchor within its doc
	Rune*		name;	// name attr
	Rune*		href;		// href attr
	int			target;	// target attr as targetid
};


// DestAnchor is for info about hyperlinks that are destinations
struct DestAnchor
{
	DestAnchor*	next;		// next in list of document's destanchors
	int			index;	// serial no. of anchor within its doc
	Rune*		name;	// name attr
	Item*		item;		// the destination
};


// Maps (client side)
struct Map
{
	Map*	next;			// next in list of document's maps
	Rune*	name;		// map name
	Area*	areas;		// list of map areas
};


struct Area
{
	Area*		next;		// next in list of a map's areas
	int			shape;	// SHrect, etc.
	Rune*		href;		// associated hypertext link
	int			target;	// associated target frame
	Dimen*		coords;	// array of coords for shape
	int			ncoords;	// size of coords array
};

// Area shapes
enum {
	SHrect, SHcircle, SHpoly
};

// Fonts are represented by integers: style*NumSize + size

// Font styles
enum {
	FntR,			// roman
	FntI,			// italic
	FntB,			// bold
	FntT,			// typewriter
	NumStyle
};

// Font sizes
enum {
	Tiny,
	Small,
	Normal,
	Large,
	Verylarge,
	NumSize
};

enum {
	NumFnt = (NumStyle*NumSize),
	DefFnt = (FntR*NumSize+Normal)
};

// Lines are needed through some text items, for underlining or strikethrough
enum {
	ULnone, ULunder, ULmid
};

// Kidinfo flags
enum {
	FRnoresize =	(1<<0),
	FRnoscroll =	(1<<1),
	FRhscroll = 	(1<<2),
	FRvscroll =	(1<<3),
	FRhscrollauto = (1<<4),
	FRvscrollauto =	(1<<5)
};

// Information about child frame or frameset
struct Kidinfo
{
	Kidinfo*		next;		// in list of kidinfos for a frameset
	int			isframeset;

	// fields for "frame"
	Rune*		src;		// only nil if a "dummy" frame or this is frameset
	Rune*		name;	// always non-empty if this isn't frameset
	int			marginw;
	int			marginh;
	int			framebd;
	int			flags;

	// fields for "frameset"
	Dimen*		rows;	// array of row dimensions
	int			nrows;	// length of rows
	Dimen*		cols;		// array of col dimensions
	int			ncols;	// length of cols
	Kidinfo*		kidinfos;
	Kidinfo*		nextframeset;	// parsing stack
};


// Document info (global information about HTML page)
struct Docinfo
{
	// stuff from HTTP headers, doc head, and body tag
	Rune*		src;				// original source of doc
	Rune*		base;			// base URL of doc
	Rune*		doctitle;			// from <title> element
	Background	background;		// background specification
	Iimage*		backgrounditem;	// Image Item for doc background image, or nil
	int			text;				// doc foreground (text) color
	int			link;				// unvisited hyperlink color
	int			vlink;			// visited hyperlink color
	int			alink;			// highlighting hyperlink color
	int			target;			// target frame default
	int			chset;			// ISO_8859, etc.
	int			mediatype;		// TextHtml, etc.
	int			scripttype;		// TextJavascript, etc.
	int			hasscripts;		// true if scripts used
	Rune*		refresh;			// content of <http-equiv=Refresh ...>
	Kidinfo*		kidinfo;			// if a frameset
	int			frameid;			// id of document frame

	// info needed to respond to user actions
	Anchor*		anchors;			// list of href anchors
	DestAnchor*	dests;			// list of destination anchors
	Form*		forms;			// list of forms
	Table*		tables;			// list of tables
	Map*		maps;			// list of maps
	Iimage*		images;			// list of image items (through nextimage links)
};

extern int			dimenkind(Dimen d);
extern int			dimenspec(Dimen d);
extern void		freedocinfo(Docinfo* d);
extern void		freeitems(Item* ithead);
extern Item*		parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
extern void		printitems(Item* items, char* msg);
extern int			targetid(Rune* s);
extern Rune*		targetname(int targid);
extern int			validitems(Item* i);

#pragma varargck	type "I"	Item*

// Control print output
extern int			warn;
extern int			dbglex;
extern int			dbgbuild;

// To be provided by caller
// emalloc and erealloc should not return if can't get memory.
// emalloc should zero its memory.
extern void*	emalloc(ulong);
extern void*	erealloc(void* p, ulong size);

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].