#include "all.h"
static char*
skip(char *s)
{
while(isspace(*s))
s++;
return s;
}
static char*
token(char **ps)
{
char *s;
char *t;
s = skip(*ps);
if(*s == '\0')
return nil;
t = s;
while(*t && !isspace(*t))
t++;
if(*t){
*t++ = '\0';
t = skip(t);
}
*ps = t;
return s;
}
static char*
arg(char **ps)
{
char *s;
char *t;
s = skip(*ps);
if(*s == '\0')
return nil;
t = s;
while(*t != ',' && *t != '\0')
t++;
if(*t){
*t++ = '\0';
t = skip(t);
}
*ps = t;
return s;
}
static char*
strchrp(char *s, int c)
{
int np;
for(np=0; np>=0 && *s; s++){
if(np==0 && *s==c)
return s;
if(*s=='(')
np++;
if(*s==')')
np--;
}
return nil;
}
typedef struct T T;
struct T {
char *s;
int n;
};
static int
findnum(T *t, int nt, char *s)
{
int i;
for(i=0; i<nt; i++)
if(strcmp(t[i].s, s)==0)
return t[i].n;
return -1;
}
static char*
findname(T *t, int nt, int n)
{
int i;
for(i=0; i<nt; i++)
if(t[i].n == n)
return t[i].s;
return nil;
}
static T regs[] = {
"AX", RAX,
"CX", RCX,
"DX", RDX,
"BX", RBX,
"SP", RSP,
"BP", RBP,
"SI", RSI,
"DI", RDI,
"ES", RES,
"CS", RCS,
"SS", RSS,
"DS", RDS,
"FS", RFS,
"GS", RGS,
"AL", NREG+RAL,
"CL", NREG+RCL,
"DL", NREG+RDL,
"BL", NREG+RBL,
"AH", NREG+RAH,
"CH", NREG+RCH,
"DH", NREG+RDH,
"BH", NREG+RBH,
};
static int
regname(char *s)
{
return findnum(regs, nelem(regs), s);
}
static char*
regstr(int n, int sz)
{
return findname(regs, nelem(regs), n+(sz==8?NREG:0));
}
static T prefixes[] = {
/* BUG: add size prefixes */
"LOCK", PLOCK,
"REPN", PREPN,
"REP", PREP,
"ES", PES,
"CS", PCS,
"SS", PSS,
"DS", PDS,
};
static int
findprefix(char *s)
{
return findnum(prefixes, nelem(prefixes), s);
}
static char*
prefixstr(int n)
{
return findname(prefixes, nelem(prefixes), n);
}
static T opcodes[] = {
"AAA", OAAA,
"AAD", OAAD,
"AAM", OAAM,
"AAS", OAAS,
"ADC", OADC,
"ADD", OADD,
"AND", OAND,
"ARPL", OARPL,
"ASIZE", OASIZE,
"BOUND", OBOUND,
"BT", OBT,
"BTS", OBTS,
"BYTE", OBYTE,
"CALL", OCALL,
"CBW", OCBW,
"CLC", OCLC,
"CLD", OCLD,
"CLI", OCLI,
"CMC", OCMC,
"MOVOS", OCMOVOS,
"MOVOC", OCMOVOC,
"MOVCS", OCMOVCS,
"MOVCC", OCMOVCC,
"MOVEQ", OCMOVEQ,
"MOVNE", OCMOVNE,
"MOVLS", OCMOVLS,
"MOVHI", OCMOVHI,
"MOVMI", OCMOVMI,
"MOVPL", OCMOVPL,
"MOVPS", OCMOVPS,
"MOVPC", OCMOVPC,
"MOVLT", OCMOVLT,
"MOVGE", OCMOVGE,
"MOVLE", OCMOVLE,
"MOVGT", OCMOVGT,
"CMP", OCMP,
"CMPS", OCMPS,
"CWD", OCWD,
"DAA", ODAA,
"DAS", ODAS,
"DEC", ODEC,
"DIV", ODIV,
"ENTER", OENTER,
"HLT", OHLT,
"IDIV", OIDIV,
"IMUL", OIMUL,
"IN", OIN,
"INC", OINC,
"INS", OINS,
"INT", OINT,
"IRET", OIRET,
"JCXZ", OJCXZ,
"JMP", OJMP,
"JOS", OJOS,
"JOC", OJOC,
"JCS", OJCS,
"JCC", OJCC,
"JEQ", OJEQ,
"JNE", OJNE,
"JLS", OJLS,
"JHI", OJHI,
"JMI", OJMI,
"JPL", OJPL,
"JPS", OJPS,
"JPC", OJPC,
"JLT", OJLT,
"JGE", OJGE,
"JLE", OJLE,
"JGT", OJGT,
"LABEL", OLABEL,
"LAHF", OLAHF,
"LEA", OLEA,
"LEAVE", OLEAVE,
"LFP", OLFP,
"LODS", OLODS,
"LOOP", OLOOP,
"LOOPNZ", OLOOPNZ,
"LOOPZ", OLOOPZ,
"MOV", OMOV,
"MOVS", OMOVS,
"MUL", OMUL,
"NEG", ONEG,
"NOP", ONOP,
"NOT", ONOT,
"OR", OOR,
"OSIZE", OOSIZE,
"OUT", OOUT,
"OUTS", OOUTS,
"POP", OPOP,
"POPA", OPOPA,
"POPF", OPOPF,
"PUSH", OPUSH,
"PUSHA", OPUSHA,
"PUSHF", OPUSHF,
"RCL", ORCL,
"RCR", ORCR,
"RET", ORET,
"RETF", ORETF,
"ROL", OROL,
"ROR", OROR,
"SAHF", OSAHF,
"SAR", OSAR,
"SBB", OSBB,
"SCAS", OSCAS,
"SETOS", OSETOS,
"SETOC", OSETOC,
"SETCS", OSETCS,
"SETCC", OSETCC,
"SETEQ", OSETEQ,
"SETNE", OSETNE,
"SETLS", OSETLS,
"SETHI", OSETHI,
"SETMI", OSETMI,
"SETPL", OSETPL,
"SETPS", OSETPS,
"SETPC", OSETPC,
"SETLT", OSETLT,
"SETGE", OSETGE,
"SETLE", OSETLE,
"SETGT", OSETGT,
"SHL", OSHL,
"SHLD", OSHLD,
"SHR", OSHR,
"SHRD", OSHRD,
"STC", OSTC,
"STD", OSTD,
"STI", OSTI,
"STRING", OSTRING,
"STOS", OSTOS,
"SUB", OSUB,
"TEST", OTEST,
"WAIT", OWAIT,
"XCHG", OXCHG,
"XLAT", OXLAT,
"XOR", OXOR,
};
static int
isopcode(char *s)
{
return findnum(opcodes, nelem(opcodes), s);
}
static char*
opname(int n)
{
return findname(opcodes, nelem(opcodes), n);
}
static char*
name(char **ps)
{
char *s;
char *t;
s = skip(*ps);
if(!isalpha(*s) && *s != '_' && !(*s&0x80))
return nil;
s++;
while(isalpha(*s) || isdigit(*s) || *s=='_' || (*s&0x80))
s++;
t = emalloc(s-*ps+1);
memmove(t, *ps, s-*ps);
t[s-*ps] = '\0';
*ps = s;
return t;
}
Expr*
_parseexpr(char **ps)
{
Expr *e, *ee;
char *s, *t;
int x;
s = *ps;
s = skip(s);
if(*s == '\0')
return nil;
if(s[0]=='('){
e = _parseexpr(&s);
if(e == nil || *s != ')')
return nil;
*ps = s+1;
return e;
}
if(s[0]=='*'){
e = _parseexpr(&s);
if(e == nil)
return nil;
*ps = s;
ee = emalloc(sizeof(*ee));
ee->op = EINDIRECT;
ee->arg[0] = e;
return ee;
}
ee = emalloc(sizeof(*ee));
ee->con = 0xDADADA; /* make sure we use 2-byte immediates for unknown constants */
if(t = strchrp(s, '+')){
ee->op = EADD;
Twoargs:
*t++ = '\0';
e = _parseexpr(&s);
if(e==nil || *skip(s) != '\0')
return nil;
ee->arg[0] = e;
e = _parseexpr(&t);
if(e==nil)
return nil;
ee->arg[1] = e;
*ps = t;
return e;
}
if(t = strchrp(s, '-')){
ee->op = ESUB;
goto Twoargs;
}
if(t = strchrp(s, '*')){
ee->op = EMUL;
goto Twoargs;
}
x = strtol(s, &t, 0);
if(t != s){
*ps = t;
ee->op = ECONST;
ee->con = x;
return ee;
}
t = name(&s);
if(t == nil)
return nil;
*ps = s;
if((x = regname(t)) >= 0){
ee->op = EREG;
if(x >= NREG){
ee->sz = 8;
ee->reg = x-NREG;
}else{
ee->sz = 16;
ee->reg = x;
}
return ee;
}
ee->op = ENAME;
ee->s = t;
return ee;
}
Expr*
parseexpr(char *s)
{
Expr *e;
e = _parseexpr(&s);
if(e==nil || *skip(s) != '\0')
return nil;
return e;
}
Inst*
parseinst(char *s)
{
char *t, *tt;
Inst *i;
int p;
Expr *e;
i = emalloc(sizeof(Inst));
t = token(&s);
if(t == nil)
return nil;
if(t[strlen(t)-1] == ':'){
i->op = OLABEL;
t[strlen(t)-1] = '\0';
i->label = t;
return i;
}
while((p = findprefix(t)) > 0){
if(i->npref >= nelem(i->pref))
parseerror("too many prefixes");
i->pref[i->npref++] = p;
t = token(&s);
}
if(t == nil)
parseerror("prefixes with no instruction");
if((i->op = isopcode(t)) < 0)
parseerror("unrecognized opcode '%s'", t);
while(t = arg(&s)){
if(i->narg >= nelem(i->arg))
parseerror("too many arguments");
tt = estrdup(t);
if((e = parseexpr(t)) == nil)
parseerror("cannot parse argument '%s'", tt);
free(tt);
i->arg[i->narg++] = e;
}
return i;
}
int
instfmt(Fmt *fmt)
{
int j;
Inst *i;
i = va_arg(fmt->args, Inst*);
if(i == nil)
return fmtstrcpy(fmt, "<nil inst>");
if(i->op==OLABEL)
return fmtprint(fmt, "%s:", i->label);
for(j=0; j<i->npref; j++)
fmtprint(fmt, "%s ", prefixstr(i->pref[j]));
fmtprint(fmt, "%s", opname(i->op));
for(j=0; j<i->narg; j++){
if(j==0)
fmtprint(fmt, " ");
else
fmtprint(fmt, ", ");
fmtprint(fmt, "%E", i->arg[j]);
}
return 0;
}
int
exprfmt(Fmt *fmt)
{
Expr *e;
e = va_arg(fmt->args, Expr*);
if(e == nil)
return fmtprint(fmt, "<nil expr>");
switch(e->op){
default:
return fmtprint(fmt, "<expr op %d>", e->op);
case EREG:
return fmtstrcpy(fmt, regstr(e->reg, e->sz));
case ENAME:
return fmtstrcpy(fmt, e->s);
case EINDIRECT:
return fmtprint(fmt, "*(%E)", e->arg[0]);
case EADD:
return fmtprint(fmt, "(%E+%E)", e->arg[0], e->arg[1]);
case ESUB:
return fmtprint(fmt, "(%E-%E)", e->arg[0], e->arg[1]);
case EMUL:
return fmtprint(fmt, "(%E*%E)", e->arg[0], e->arg[1]);
case ECONST:
if(e->con > -10 && e->con < 10)
return fmtprint(fmt, "%d", e->con);
return fmtprint(fmt, "%#x", e->con);
}
}
|