#include "l.h"
/*
* flag: insert nops to prevent three consecutive stores.
* workaround for 24k erratum #48, costs about 10% in text space,
* so only enable this if you need it. test cases are "hoc -e '7^6'"
* and "{ echo moon; echo plot } | scat".
*/
enum {
Mips24k = 0,
};
static int
isdblwrdmov(Prog *p)
{
if(p == nil)
return 0;
switch(p->as){
case AMOVD:
case AMOVDF:
case AMOVDW:
case AMOVFD:
case AMOVWD:
case AMOVV:
case AMOVVL:
case AMOVVR:
case AMOVFV:
case AMOVDV:
case AMOVVF:
case AMOVVD:
return 1;
}
return 0;
}
static int
ismove(Prog *p)
{
if(p == nil)
return 0;
switch(p->as){
case AMOVB:
case AMOVBU:
case AMOVF:
case AMOVFW:
case AMOVH:
case AMOVHU:
case AMOVW:
case AMOVWF:
case AMOVWL:
case AMOVWR:
case AMOVWU:
return 1;
}
if(isdblwrdmov(p))
return 1;
return 0;
}
static int
isstore(Prog *p)
{
if(p == nil)
return 0;
if(ismove(p))
switch(p->to.type) {
case D_OREG:
case D_EXTERN:
case D_STATIC:
case D_AUTO:
case D_PARAM:
return 1;
}
return 0;
}
static int
iscondbranch(Prog *p)
{
if(p == nil)
return 0;
switch(p->as){
case ABEQ:
case ABFPF:
case ABFPT:
case ABGEZ:
case ABGEZAL:
case ABGTZ:
case ABLEZ:
case ABLTZ:
case ABLTZAL:
case ABNE:
return 1;
}
return 0;
}
static int
isbranch(Prog *p)
{
if(p == nil)
return 0;
switch(p->as){
case AJAL:
case AJMP:
case ARET:
case ARFE:
return 1;
}
if(iscondbranch(p))
return 1;
return 0;
}
static void
nopafter(Prog *p)
{
p->mark |= LABEL|SYNC;
addnop(p);
}
/*
* workaround for 24k erratum #48, costs about 0.5% in space.
* inserts a NOP before the last of 3 consecutive stores.
* double-word stores complicate things.
*/
static int
no3stores(Prog *p)
{
Prog *p1;
if(!isstore(p))
return 0;
p1 = p->link;
if(!isstore(p1))
return 0;
if(isdblwrdmov(p) || isdblwrdmov(p1)) {
nopafter(p);
nop.store.count++;
nop.store.outof++;
return 1;
}
if(isstore(p1->link)) {
nopafter(p1);
nop.store.count++;
nop.store.outof++;
return 1;
}
return 0;
}
/*
* keep stores out of branch delay slots.
* this is costly in space (the other 9.5%), but makes no3stores effective.
* there is undoubtedly a better way to do this.
*/
void
storesnosched(void)
{
Prog *p;
for(p = firstp; p != P; p = p->link)
if(isstore(p))
p->mark |= NOSCHED;
}
int
triplestorenops(void)
{
int r;
Prog *p, *p1;
r = 0;
for(p = firstp; p != P; p = p1) {
p1 = p->link;
// if (p->mark & NOSCHED)
// continue;
if(ismove(p) && isstore(p)) {
if (no3stores(p))
r++;
/*
* given storenosched, the next two
* checks shouldn't be necessary.
*/
/*
* add nop after first MOV in `MOV; Bcond; MOV'.
*/
else if(isbranch(p1) && isstore(p1->link)) {
nopafter(p);
nop.branch.count++;
nop.branch.outof++;
r++;
}
/*
* this may be a branch target, so insert a nop after,
* in case a branch leading here has a store in its
* delay slot and we have consecutive stores here.
*/
if(p->mark & (LABEL|SYNC) && !isnop(p1)) {
nopafter(p);
nop.branch.count++;
nop.branch.outof++;
r++;
}
} else if (isbranch(p))
/*
* can't ignore delay slot of a conditional branch;
* the branch could fail and fall through.
*/
if (!iscondbranch(p) && p1)
p1 = p1->link; /* skip its delay slot */
}
return r;
}
void
noops(void)
{
Prog *p, *p1, *q, *q1;
int o, curframe, curbecome, maxbecome;
/*
* find leaf subroutines
* become sizes
* frame sizes
* strip NOPs
* expand RET
* expand BECOME pseudo
*/
if(debug['v'])
Bprint(&bso, "%5.2f noops\n", cputime());
Bflush(&bso);
curframe = 0;
curbecome = 0;
maxbecome = 0;
curtext = 0;
q = P;
for(p = firstp; p != P; p = p->link) {
/* find out how much arg space is used in this TEXT */
if(p->to.type == D_OREG && p->to.reg == REGSP)
if(p->to.offset > curframe)
curframe = p->to.offset;
switch(p->as) {
case ATEXT:
if(curtext && curtext->from.sym) {
curtext->from.sym->frame = curframe;
curtext->from.sym->become = curbecome;
if(curbecome > maxbecome)
maxbecome = curbecome;
}
curframe = 0;
curbecome = 0;
p->mark |= LABEL|LEAF|SYNC;
if(p->link)
p->link->mark |= LABEL;
curtext = p;
break;
/* too hard, just leave alone */
case AMOVW:
if(p->to.type == D_FCREG ||
p->to.type == D_MREG) {
p->mark |= LABEL|SYNC;
break;
}
if(p->from.type == D_FCREG ||
p->from.type == D_MREG) {
p->mark |= LABEL|SYNC;
addnop(p);
addnop(p);
nop.mfrom.count += 2;
nop.mfrom.outof += 2;
break;
}
break;
/* too hard, just leave alone */
case ACASE:
case ASYSCALL:
case AWORD:
case ATLBWR:
case ATLBWI:
case ATLBP:
case ATLBR:
p->mark |= LABEL|SYNC;
break;
case ANOR:
if(p->to.type == D_REG && p->to.reg == REGZERO)
p->mark |= LABEL|SYNC;
break;
case ARET:
/* special form of RET is BECOME */
if(p->from.type == D_CONST)
if(p->from.offset > curbecome)
curbecome = p->from.offset;
if(p->link != P)
p->link->mark |= LABEL;
break;
case ANOP:
q1 = p->link;
q->link = q1; /* q is non-nop */
q1->mark |= p->mark;
continue;
case ABCASE:
p->mark |= LABEL|SYNC;
goto dstlab;
case ABGEZAL:
case ABLTZAL:
case AJAL:
if(curtext != P)
curtext->mark &= ~LEAF;
case AJMP:
case ABEQ:
case ABGEZ:
case ABGTZ:
case ABLEZ:
case ABLTZ:
case ABNE:
case ABFPT:
case ABFPF:
p->mark |= BRANCH;
dstlab:
q1 = p->cond;
if(q1 != P) {
while(q1->as == ANOP) {
q1 = q1->link;
p->cond = q1;
}
if(!(q1->mark & LEAF))
q1->mark |= LABEL;
} else
p->mark |= LABEL;
q1 = p->link;
if(q1 != P)
q1->mark |= LABEL;
break;
}
q = p;
}
if(curtext && curtext->from.sym) {
curtext->from.sym->frame = curframe;
curtext->from.sym->become = curbecome;
if(curbecome > maxbecome)
maxbecome = curbecome;
}
if(debug['b'])
print("max become = %d\n", maxbecome);
xdefine("ALEFbecome", STEXT, maxbecome);
curtext = 0;
for(p = firstp; p != P; p = p->link) {
switch(p->as) {
case ATEXT:
curtext = p;
break;
case AJAL:
if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
o = maxbecome - curtext->from.sym->frame;
if(o <= 0)
break;
/* calling a become or calling a variable */
if(p->to.sym == S || p->to.sym->become) {
curtext->to.offset += o;
if(debug['b']) {
curp = p;
print("%D calling %D increase %d\n",
&curtext->from, &p->to, o);
}
}
}
break;
}
}
for(p = firstp; p != P; p = p->link) {
o = p->as;
switch(o) {
case ATEXT:
curtext = p;
autosize = p->to.offset + 4;
if(autosize <= 4)
if(curtext->mark & LEAF) {
p->to.offset = -4;
autosize = 0;
}
q = p;
if(autosize) {
q = prg();
q->as = AADD;
q->line = p->line;
q->from.type = D_CONST;
q->from.offset = -autosize;
q->to.type = D_REG;
q->to.reg = REGSP;
q->link = p->link;
p->link = q;
} else
if(!(curtext->mark & LEAF)) {
if(debug['v'])
Bprint(&bso, "save suppressed in: %s\n",
curtext->from.sym->name);
Bflush(&bso);
curtext->mark |= LEAF;
}
if(curtext->mark & LEAF) {
if(curtext->from.sym)
curtext->from.sym->type = SLEAF;
break;
}
q1 = prg();
q1->as = AMOVW;
q1->line = p->line;
q1->from.type = D_REG;
q1->from.reg = REGLINK;
q1->to.type = D_OREG;
q1->from.offset = 0;
q1->to.reg = REGSP;
q1->link = q->link;
q->link = q1;
break;
case ARET:
nocache(p);
if(p->from.type == D_CONST)
goto become;
if(curtext->mark & LEAF) {
if(!autosize) {
p->as = AJMP;
p->from = zprg.from;
p->to.type = D_OREG;
p->to.offset = 0;
p->to.reg = REGLINK;
p->mark |= BRANCH;
break;
}
p->as = AADD;
p->from.type = D_CONST;
p->from.offset = autosize;
p->to.type = D_REG;
p->to.reg = REGSP;
q = prg();
q->as = AJMP;
q->line = p->line;
q->to.type = D_OREG;
q->to.offset = 0;
q->to.reg = REGLINK;
q->mark |= BRANCH;
q->link = p->link;
p->link = q;
break;
}
p->as = AMOVW;
p->from.type = D_OREG;
p->from.offset = 0;
p->from.reg = REGSP;
p->to.type = D_REG;
p->to.reg = 2;
q = p;
if(autosize) {
q = prg();
q->as = AADD;
q->line = p->line;
q->from.type = D_CONST;
q->from.offset = autosize;
q->to.type = D_REG;
q->to.reg = REGSP;
q->link = p->link;
p->link = q;
}
q1 = prg();
q1->as = AJMP;
q1->line = p->line;
q1->to.type = D_OREG;
q1->to.offset = 0;
q1->to.reg = 2;
q1->mark |= BRANCH;
q1->link = q->link;
q->link = q1;
break;
become:
if(curtext->mark & LEAF) {
q = prg();
q->line = p->line;
q->as = AJMP;
q->from = zprg.from;
q->to = p->to;
q->cond = p->cond;
q->link = p->link;
q->mark |= BRANCH;
p->link = q;
p->as = AADD;
p->from = zprg.from;
p->from.type = D_CONST;
p->from.offset = autosize;
p->to = zprg.to;
p->to.type = D_REG;
p->to.reg = REGSP;
break;
}
q = prg();
q->line = p->line;
q->as = AJMP;
q->from = zprg.from;
q->to = p->to;
q->cond = p->cond;
q->link = p->link;
q->mark |= BRANCH;
p->link = q;
q = prg();
q->line = p->line;
q->as = AADD;
q->from.type = D_CONST;
q->from.offset = autosize;
q->to.type = D_REG;
q->to.reg = REGSP;
q->link = p->link;
p->link = q;
p->as = AMOVW;
p->from = zprg.from;
p->from.type = D_OREG;
p->from.offset = 0;
p->from.reg = REGSP;
p->to = zprg.to;
p->to.type = D_REG;
p->to.reg = REGLINK;
break;
}
}
if (Mips24k)
storesnosched();
curtext = P;
q = P; /* p - 1 */
q1 = firstp; /* top of block */
o = 0; /* count of instructions */
for(p = firstp; p != P; p = p1) {
p1 = p->link;
o++;
if(p->mark & NOSCHED){
if(q1 != p){
sched(q1, q);
}
for(; p != P; p = p->link){
if(!(p->mark & NOSCHED))
break;
q = p;
}
p1 = p;
q1 = p;
o = 0;
continue;
}
if(p->mark & (LABEL|SYNC)) {
if(q1 != p)
sched(q1, q);
q1 = p;
o = 1;
}
if(p->mark & (BRANCH|SYNC)) {
sched(q1, p);
q1 = p1;
o = 0;
}
if(o >= NSCHED) {
sched(q1, p);
q1 = p1;
o = 0;
}
q = p;
}
if (Mips24k)
triplestorenops();
}
void
addnop(Prog *p)
{
Prog *q;
q = prg();
q->as = ANOR;
q->line = p->line;
q->from.type = D_REG;
q->from.reg = REGZERO;
q->to.type = D_REG;
q->to.reg = REGZERO;
q->link = p->link;
p->link = q;
}
void
nocache(Prog *p)
{
p->optab = 0;
p->from.class = 0;
p->to.class = 0;
}
|