Plan 9 from Bell Labs’s /usr/web/sources/contrib/rsc/86a/parse.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include "all.h"

static char*
skip(char *s)
{
	while(isspace(*s))
		s++;
	return s;
}

static char*
token(char **ps)
{
	char *s;
	char *t;

	s = skip(*ps);
	if(*s == '\0')
		return nil;
	t = s;
	while(*t && !isspace(*t))
		t++;
	if(*t){
		*t++ = '\0';
		t = skip(t);
	}
	*ps = t;
	return s;
}

static char*
arg(char **ps)
{
	char *s;
	char *t;

	s = skip(*ps);
	if(*s == '\0')
		return nil;
	t = s;
	while(*t != ',' && *t != '\0')
		t++;
	if(*t){
		*t++ = '\0';
		t = skip(t);
	}
	*ps = t;
	return s;
}

static char*
strchrp(char *s, int c)
{
	int np;

	for(np=0; np>=0 && *s; s++){
		if(np==0 && *s==c)
			return s;
		if(*s=='(')
			np++;
		if(*s==')')
			np--;
	}
	return nil;
}

typedef struct T T;
struct T {
	char *s;
	int n;
};

static int
findnum(T *t, int nt, char *s)
{
	int i;

	for(i=0; i<nt; i++)
		if(strcmp(t[i].s, s)==0)
			return t[i].n;
	return -1;
}

static char*
findname(T *t, int nt, int n)
{
	int i;

	for(i=0; i<nt; i++)
		if(t[i].n == n)
			return t[i].s;
	return nil;
}

static T regs[] = {
	"AX",		RAX,
	"CX",		RCX,
	"DX",	RDX,
	"BX",		RBX,
	"SP",		RSP,
	"BP",		RBP,
	"SI",		RSI,
	"DI",		RDI,
	"ES",		RES,
	"CS",		RCS,
	"SS",		RSS,
	"DS",		RDS,
	"FS",		RFS,
	"GS",		RGS,

	"AL",		NREG+RAL,
	"CL",		NREG+RCL,
	"DL",		NREG+RDL,
	"BL",		NREG+RBL,
	"AH",	NREG+RAH,
	"CH",	NREG+RCH,
	"DH",	NREG+RDH,
	"BH",	NREG+RBH,
};


static int
regname(char *s)
{
	return findnum(regs, nelem(regs), s);
}

static char*
regstr(int n, int sz)
{
	return findname(regs, nelem(regs), n+(sz==8?NREG:0));
}

static T prefixes[] = {
	/* BUG: add size prefixes */
	"LOCK",	PLOCK,
	"REPN",	PREPN,
	"REP",	PREP,
	"ES",		PES,
	"CS",		PCS,
	"SS",		PSS,
	"DS",		PDS,
};

static int
findprefix(char *s)
{
	return findnum(prefixes, nelem(prefixes), s);
}

static char*
prefixstr(int n)
{
	return findname(prefixes, nelem(prefixes), n);
}

static T opcodes[] = {
	"AAA",	OAAA,
	"AAD",	OAAD,
	"AAM",	OAAM,
	"AAS",	OAAS,
	"ADC",	OADC,
	"ADD",	OADD,
	"AND",	OAND,
	"ARPL",	OARPL,
	"ASIZE",	OASIZE,
	"BOUND",	OBOUND,
	"BT",	OBT,
	"BTS",	OBTS,
	"BYTE",	OBYTE,
	"CALL",	OCALL,
	"CBW",	OCBW,
	"CLC",	OCLC,
	"CLD",	OCLD,
	"CLI",	OCLI,
	"CMC",	OCMC,
	"MOVOS",	OCMOVOS,
	"MOVOC",	OCMOVOC,
	"MOVCS", OCMOVCS,
	"MOVCC", OCMOVCC,
	"MOVEQ", OCMOVEQ,
	"MOVNE", OCMOVNE,
	"MOVLS", OCMOVLS,
	"MOVHI", OCMOVHI,
	"MOVMI", OCMOVMI,
	"MOVPL", OCMOVPL,
	"MOVPS", OCMOVPS,
	"MOVPC", OCMOVPC,
	"MOVLT", OCMOVLT,
	"MOVGE", OCMOVGE,
	"MOVLE", OCMOVLE,
	"MOVGT", OCMOVGT,
	"CMP",	OCMP,
	"CMPS",	OCMPS,
	"CWD",	OCWD,
	"DAA",	ODAA,
	"DAS",	ODAS,
	"DEC",	ODEC,
	"DIV",	ODIV,
	"ENTER",	OENTER,
	"HLT",	OHLT,
	"IDIV",	OIDIV,
	"IMUL",	OIMUL,
	"IN",	OIN,
	"INC",	OINC,
	"INS",	OINS,
	"INT",	OINT,
	"IRET",	OIRET,
	"JCXZ", OJCXZ,
	"JMP", OJMP,
	"JOS", OJOS,
	"JOC", OJOC,
	"JCS", OJCS,
	"JCC", OJCC,
	"JEQ", OJEQ,
	"JNE", OJNE,
	"JLS", OJLS,
	"JHI", OJHI,
	"JMI", OJMI,
	"JPL", OJPL,
	"JPS", OJPS,
	"JPC", OJPC,
	"JLT", OJLT,
	"JGE", OJGE,
	"JLE", OJLE,
	"JGT", OJGT,
	"LABEL",	OLABEL,
	"LAHF",	OLAHF,
	"LEA",	OLEA,
	"LEAVE",	OLEAVE,
	"LFP",	OLFP,
	"LODS",	OLODS,
	"LOOP",	OLOOP,
	"LOOPNZ",	OLOOPNZ,
	"LOOPZ",	OLOOPZ,
	"MOV",	OMOV,
	"MOVS",	OMOVS,
	"MUL",	OMUL,
	"NEG",	ONEG,
	"NOP",	ONOP,
	"NOT",	ONOT,
	"OR",	OOR,
	"OSIZE",	OOSIZE,
	"OUT",	OOUT,
	"OUTS",	OOUTS,
	"POP",	OPOP,
	"POPA",	OPOPA,
	"POPF",	OPOPF,
	"PUSH",	OPUSH,
	"PUSHA",	OPUSHA,
	"PUSHF",	OPUSHF,
	"RCL",	ORCL,
	"RCR",	ORCR,
	"RET",	ORET,
	"RETF",	ORETF,
	"ROL",	OROL,
	"ROR",	OROR,
	"SAHF",	OSAHF,
	"SAR",	OSAR,
	"SBB",	OSBB,
	"SCAS",	OSCAS,
	"SETOS", OSETOS,
	"SETOC", OSETOC,
	"SETCS", OSETCS,
	"SETCC", OSETCC,
	"SETEQ", OSETEQ,
	"SETNE", OSETNE,
	"SETLS", OSETLS,
	"SETHI", OSETHI,
	"SETMI", OSETMI,
	"SETPL", OSETPL,
	"SETPS", OSETPS,
	"SETPC", OSETPC,
	"SETLT", OSETLT,
	"SETGE", OSETGE,
	"SETLE", OSETLE,
	"SETGT", OSETGT,
	"SHL",	OSHL,
	"SHLD",	OSHLD,
	"SHR",	OSHR,
	"SHRD",	OSHRD,
	"STC",	OSTC,
	"STD",	OSTD,
	"STI",	OSTI,
	"STRING",	OSTRING,
	"STOS",	OSTOS,
	"SUB",	OSUB,
	"TEST",	OTEST,
	"WAIT",	OWAIT,
	"XCHG",	OXCHG,
	"XLAT",	OXLAT,
	"XOR",	OXOR,
};

static int
isopcode(char *s)
{
	return findnum(opcodes, nelem(opcodes), s);
}

static char*
opname(int n)
{
	return findname(opcodes, nelem(opcodes), n);
}

static char*
name(char **ps)
{
	char *s;
	char *t;

	s = skip(*ps);
	if(!isalpha(*s) && *s != '_' && !(*s&0x80))
		return nil;

	s++;
	while(isalpha(*s) || isdigit(*s) || *s=='_' || (*s&0x80))
		s++;
	t = emalloc(s-*ps+1);
	memmove(t, *ps, s-*ps);
	t[s-*ps] = '\0';
	*ps = s;
	return t;
}

Expr*
_parseexpr(char **ps)
{
	Expr *e, *ee;
	char *s, *t;
	int x;

	s = *ps;
	s = skip(s);
	if(*s == '\0')
		return nil;

	if(s[0]=='('){
		e = _parseexpr(&s);
		if(e == nil || *s != ')')
			return nil;
		*ps = s+1;
		return e;
	}

	if(s[0]=='*'){
		e = _parseexpr(&s);
		if(e == nil)
			return nil;
		*ps = s;
		ee = emalloc(sizeof(*ee));
		ee->op = EINDIRECT;
		ee->arg[0] = e;
		return ee;
	}

	ee = emalloc(sizeof(*ee));
	ee->con = 0xDADADA;	/* make sure we use 2-byte immediates for unknown constants */
	if(t = strchrp(s, '+')){
		ee->op = EADD;
	Twoargs:
		*t++ = '\0';
		e = _parseexpr(&s);
		if(e==nil || *skip(s) != '\0')
			return nil;
		ee->arg[0] = e;
		e = _parseexpr(&t);
		if(e==nil)
			return nil;
		ee->arg[1] = e;
		*ps = t;
		return e;
	}

	if(t = strchrp(s, '-')){
		ee->op = ESUB;
		goto Twoargs;
	}

	if(t = strchrp(s, '*')){
		ee->op = EMUL;
		goto Twoargs;
	}

	x = strtol(s, &t, 0);
	if(t != s){
		*ps = t;
		ee->op = ECONST;
		ee->con = x;
		return ee;
	}

	t = name(&s);
	if(t == nil)
		return nil;
	*ps = s;
	if((x = regname(t)) >= 0){
		ee->op = EREG;
		if(x >= NREG){
			ee->sz = 8;
			ee->reg = x-NREG;
		}else{
			ee->sz = 16;
			ee->reg = x;
		}
		return ee;
	}

	ee->op = ENAME;
	ee->s = t;
	return ee;
}

Expr*
parseexpr(char *s)
{
	Expr *e;

	e = _parseexpr(&s);
	if(e==nil || *skip(s) != '\0')
		return nil;
	return e;
}

Inst*
parseinst(char *s)
{
	char *t, *tt;
	Inst *i;
	int p;
	Expr *e;

	i = emalloc(sizeof(Inst));
	t = token(&s);
	if(t == nil)
		return nil;
	if(t[strlen(t)-1] == ':'){
		i->op = OLABEL;
		t[strlen(t)-1] = '\0';
		i->label = t;
		return i;
	}

	while((p = findprefix(t)) > 0){
		if(i->npref >= nelem(i->pref))
			parseerror("too many prefixes");
		i->pref[i->npref++] = p;
		t = token(&s);
	}
	if(t == nil)
		parseerror("prefixes with no instruction");

	if((i->op = isopcode(t)) < 0)
		parseerror("unrecognized opcode '%s'", t);

	while(t = arg(&s)){
		if(i->narg >= nelem(i->arg))
			parseerror("too many arguments");
		tt = estrdup(t);
		if((e = parseexpr(t)) == nil)
			parseerror("cannot parse argument '%s'", tt);
		free(tt);
		i->arg[i->narg++] = e;
	}	

	return i;	
}

int
instfmt(Fmt *fmt)
{
	int j;
	Inst *i;

	i = va_arg(fmt->args, Inst*);
	if(i == nil)
		return fmtstrcpy(fmt, "<nil inst>");

	if(i->op==OLABEL)
		return fmtprint(fmt, "%s:", i->label);

	for(j=0; j<i->npref; j++)
		fmtprint(fmt, "%s ", prefixstr(i->pref[j]));

	fmtprint(fmt, "%s", opname(i->op));
	for(j=0; j<i->narg; j++){
		if(j==0)
			fmtprint(fmt, " ");
		else
			fmtprint(fmt, ", ");
		fmtprint(fmt, "%E", i->arg[j]);
	}
	return 0;
}

int
exprfmt(Fmt *fmt)
{
	Expr *e;

	e = va_arg(fmt->args, Expr*);
	if(e == nil)
		return fmtprint(fmt, "<nil expr>");
	
	switch(e->op){
	default:
		return fmtprint(fmt, "<expr op %d>", e->op);
	case EREG:
		return fmtstrcpy(fmt, regstr(e->reg, e->sz));
	case ENAME:
		return fmtstrcpy(fmt, e->s);
	case EINDIRECT:
		return fmtprint(fmt, "*(%E)", e->arg[0]);
	case EADD:
		return fmtprint(fmt, "(%E+%E)", e->arg[0], e->arg[1]);
	case ESUB:
		return fmtprint(fmt, "(%E-%E)", e->arg[0], e->arg[1]);
	case EMUL:
		return fmtprint(fmt, "(%E*%E)", e->arg[0], e->arg[1]);
	case ECONST:
		if(e->con > -10 && e->con < 10)
			return fmtprint(fmt, "%d", e->con);
		return fmtprint(fmt, "%#x", e->con);
	}
}


Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].