/*
* nvidia tegra 2 architecture-specific stuff
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "../port/error.h"
#include "io.h"
#include "arm.h"
#include "../port/netif.h"
#include "etherif.h"
#include "../port/flashif.h"
#include "../port/usb.h"
#include "../port/portusbehci.h"
#include "usbehci.h"
enum {
/* hardware limits imposed by register contents or layouts */
Maxcpus = 4,
Maxflowcpus = 2,
Debug = 0,
};
typedef struct Clkrst Clkrst;
typedef struct Diag Diag;
typedef struct Flow Flow;
typedef struct Scu Scu;
typedef struct Power Power;
struct Clkrst {
ulong rstsrc;
ulong rstdevl;
ulong rstdevh;
ulong rstdevu;
ulong clkoutl;
ulong clkouth;
ulong clkoutu;
uchar _pad0[0x24-0x1c];
ulong supcclkdiv; /* super cclk divider */
ulong _pad1;
ulong supsclkdiv; /* super sclk divider */
uchar _pad4[0x4c-0x30];
ulong clkcpu;
uchar _pad1[0xe0-0x50];
ulong pllxbase; /* pllx controls CPU clock speed */
ulong pllxmisc;
ulong pllebase; /* plle is dedicated to pcie */
ulong pllemisc;
uchar _pad2[0x340-0xf0];
ulong cpuset;
ulong cpuclr;
};
enum {
/* rstsrc bits */
Wdcpurst = 1<<0,
Wdcoprst = 1<<1,
Wdsysrst = 1<<2,
Wdsel = 1<<4, /* tmr1 or tmr2? */
Wdena = 1<<5,
/* devl bits */
Sysreset = 1<<2,
/* clkcpu bits */
Cpu1stop = 1<<9,
Cpu0stop = 1<<8,
/* cpu* bits */
Cpu1dbgreset = 1<<13,
Cpu0dbgreset = 1<<12,
Cpu1wdreset = 1<<9,
Cpu0wdreset = 1<<8,
Cpu1dereset = 1<<5,
Cpu0dereset = 1<<4,
Cpu1reset = 1<<1,
Cpu0reset = 1<<0,
};
struct Power {
ulong ctl; /* mainly for rtc clock signals */
ulong secregdis;
ulong swrst;
ulong wakevmask;
ulong waklvl;
ulong waksts;
ulong swwaksts;
ulong dpdpadsovr; /* deep power down pads override */
ulong dpdsample;
ulong dpden;
ulong gatetimroff;
ulong gatetimron;
ulong toggle;
ulong unclamp;
ulong gatests; /* ro */
ulong goodtmr;
ulong blinktmr;
ulong noiopwr;
ulong detect;
ulong detlatch;
ulong scratch[24];
ulong secscratch[6];
ulong cpupwrgoodtmr;
ulong cpupwrofftmr;
ulong pgmask[2];
ulong autowaklvl;
ulong autowaklvlmask;
ulong wakdelay;
ulong detval;
ulong ddr;
ulong usbdebdel; /* usb de-bounce delay */
ulong usbao;
ulong cryptoop;
ulong pllpwb0ovr;
ulong scratch24[42-24+1];
ulong boundoutmirr[3];
ulong sys33ven;
ulong boundoutmirracc;
ulong gate;
};
enum {
/* toggle bits */
Start = 1<<8,
/* partition ids */
Partpcie= 3,
Partl2 = 4,
};
struct Scu {
ulong ctl;
ulong cfg; /* ro */
ulong cpupwrsts;
ulong inval;
uchar _pad0[0x40-0x10];
ulong filtstart;
ulong filtend;
uchar _pad1[0x50-0x48];
ulong accctl; /* initially 0 */
ulong nsaccctl;
};
enum {
/* ctl bits */
Scuenable = 1<<0,
Filter = 1<<1,
Scuparity = 1<<2,
Specfill = 1<<3, /* only for PL310 */
Allport0 = 1<<4,
Standby = 1<<5,
Icstandby = 1<<6,
};
struct Flow {
ulong haltcpu0;
ulong haltcop;
ulong cpu0;
ulong cop;
ulong xrq;
ulong haltcpu1;
ulong cpu1;
};
enum {
/* haltcpu* bits */
Stop = 2<<29,
/* cpu* bits */
Event = 1<<14, /* w1c */
Waitwfebitsshift = 4,
Waitwfebitsmask = MASK(2),
Eventenable = 1<<1,
Cpuenable = 1<<0,
};
struct Diag {
Cacheline c0;
Lock;
long cnt;
long sync;
Cacheline c1;
};
extern ulong testmem;
/*
* number of cpus available. contrast with conf.nmach, which is number
* of running cpus.
*/
int navailcpus;
Isolated l1ptstable;
Soc soc = {
.clkrst = 0x60006000, /* clock & reset signals */
.power = 0x7000e400,
.exceptvec = PHYSEVP, /* undocumented magic */
.sema = 0x60001000,
.l2cache= PHYSL2BAG, /* pl310 bag on the side */
.flow = 0x60007000,
/* 4 non-gic controllers */
// .intr = { 0x60004000, 0x60004100, 0x60004200, 0x60004300, },
/* private memory region */
.scu = 0x50040000,
/* we got this address from the `cortex-a series programmer's guide'. */
.intr = 0x50040100, /* per-cpu interface */
.glbtmr = 0x50040200,
.loctmr = 0x50040600,
.intrdist=0x50041000,
.uart = { 0x70006000, 0x70006040,
0x70006200, 0x70006300, 0x70006400, },
.rtc = 0x7000e000,
.tmr = { 0x60005000, 0x60005008, 0x60005050, 0x60005058, },
.µs = 0x60005010,
.pci = 0x80000000,
.ether = 0xa0024000,
.nand = 0x70008000,
.nor = 0x70009000, /* also VIRTNOR */
.ehci = P2VAHB(0xc5000000), /* 1st of 3 */
.ide = P2VAHB(0xc3000000),
.gpio = { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180,
0x6000d200, 0x6000d280, 0x6000d300, },
.spi = { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, },
.twsi = 0x7000c000,
.mmc = { P2VAHB(0xc8000000), P2VAHB(0xc8000200),
P2VAHB(0xc8000400), P2VAHB(0xc8000600), },
};
static volatile Diag diag;
static int missed;
void
dumpcpuclks(void) /* run CPU at full speed */
{
Clkrst *clk = (Clkrst *)soc.clkrst;
iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc);
iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc);
iprint("super cclk divider %#lux\n", clk->supcclkdiv);
iprint("super sclk divider %#lux\n", clk->supsclkdiv);
}
static char *
devidstr(ulong)
{
return "ARM Cortex-A9";
}
void
archtegralink(void)
{
}
/* convert AddrDevid register to a string in buf and return buf */
char *
cputype2name(char *buf, int size)
{
ulong r;
r = cpidget(); /* main id register */
assert((r >> 24) == 'A');
seprint(buf, buf + size, "Cortex-A9 r%ldp%ld",
(r >> 20) & MASK(4), r & MASK(4));
return buf;
}
static void
errata(void)
{
ulong reg, r, p;
/* apply cortex-a9 errata workarounds */
r = cpidget(); /* main id register */
assert((r >> 24) == 'A');
p = r & MASK(4); /* minor revision */
r >>= 20;
r &= MASK(4); /* major revision */
/* this is an undocumented `diagnostic register' that linux knows */
reg = cprdsc(0, CpDTLB, 0, 1);
if (r < 2 || r == 2 && p <= 2)
reg |= 1<<4; /* 742230 */
if (r == 2 && p <= 2)
reg |= 1<<6 | 1<<12 | 1<<22; /* 743622, 2×742231 */
if (r < 3)
reg |= 1<<11; /* 751472 */
cpwrsc(0, CpDTLB, 0, 1, reg);
}
void
archconfinit(void)
{
char *p;
ulong hz;
assert(m != nil);
m->cpuhz = 1000 * Mhz; /* trimslice speed */
p = getconf("*cpumhz");
if (p) {
hz = atoi(p) * Mhz;
if (hz >= 100*Mhz && hz <= 3600UL*Mhz)
m->cpuhz = hz;
}
m->delayloop = m->cpuhz/2000; /* initial estimate */
errata();
}
int
archether(unsigned ctlrno, Ether *ether)
{
switch(ctlrno) {
case 0:
ether->type = "rtl8169"; /* pci-e ether */
ether->ctlrno = ctlrno;
ether->irq = Pcieirq; /* non-msi pci-e intr */
ether->nopt = 0;
ether->mbps = 1000;
return 1;
}
return -1;
}
void
dumpscustate(void)
{
Scu *scu = (Scu *)soc.scu;
print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl);
print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno,
(scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1);
print("cpus' power %#lux\n", scu->cpupwrsts);
}
void
scuon(void)
{
Scu *scu = (Scu *)soc.scu;
if (scu->ctl & Scuenable)
return;
scu->inval = MASK(16);
coherence();
scu->ctl = Scuparity | Scuenable | Specfill;
coherence();
}
int
getncpus(void)
{
int n;
char *p;
Scu *scu;
if (navailcpus == 0) {
scu = (Scu *)soc.scu;
navailcpus = (scu->cfg & MASK(2)) + 1;
if (navailcpus > MAXMACH)
navailcpus = MAXMACH;
p = getconf("*ncpu");
if (p && *p) {
n = atoi(p);
if (n > 0 && n < navailcpus)
navailcpus = n;
}
}
return navailcpus;
}
void
cpuidprint(void)
{
char name[64];
cputype2name(name, sizeof name);
delay(50); /* let uart catch up */
iprint("cpu%d: %lldMHz ARM %s %s-endian\n",
m->machno, m->cpuhz / Mhz, name,
getpsr() & PsrBigend? "big": "little");
}
static void
clockson(void)
{
Clkrst *clk = (Clkrst *)soc.clkrst;
/* enable all by clearing resets */
clk->rstdevl = clk->rstdevh = clk->rstdevu = 0;
coherence();
clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */
coherence();
clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena;
coherence();
}
/* we could be shutting down ourself (if cpu == m->machno), so take care. */
void
stopcpu(uint cpu)
{
Flow *flow = (Flow *)soc.flow;
Clkrst *clk = (Clkrst *)soc.clkrst;
if (cpu == 0) {
iprint("stopcpu: may not stop cpu0\n");
return;
}
machoff(cpu);
lock(&active);
active.stopped |= 1 << cpu;
unlock(&active);
l1cache->wb();
/* shut down arm7 avp coproc so it can't cause mischief. */
/* could try watchdog without stopping avp. */
flow->haltcop = Stop;
coherence();
flow->cop = 0; /* no Cpuenable */
coherence();
delay(10);
assert(cpu < Maxflowcpus);
*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop;
coherence();
*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0; /* no Cpuenable */
coherence();
delay(10);
/* cold reset */
assert(cpu < Maxcpus);
clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu;
coherence();
delay(1);
l1cache->wb();
}
static void
synccpus(volatile long *cntp, int n)
{
ainc(cntp);
while (*cntp < n)
;
/* all cpus should now be here */
}
static void
pass1(int pass, volatile Diag *dp)
{
int i;
if(m->machno == 0)
iprint(" %d", pass);
for (i = 1000*1000; --i > 0; ) {
ainc(&dp->cnt);
adec(&dp->cnt);
}
synccpus(&dp->sync, navailcpus);
/* all cpus are now here */
ilock(dp);
if(dp->cnt != 0)
panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt);
iunlock(dp);
synccpus(&dp->sync, 2 * navailcpus);
/* all cpus are now here */
adec(&dp->sync);
adec(&dp->sync);
}
/*
* try to confirm coherence of l1 caches.
* assume that all available cpus will be started.
*/
void
l1diag(void)
{
int pass;
volatile Diag *dp;
if (!Debug)
return;
l1cache->wb();
/*
* synchronise and print
*/
dp = &diag;
ilock(dp);
if (m->machno == 0)
iprint("l1: waiting for %d cpus... ", navailcpus);
iunlock(dp);
synccpus(&dp->sync, navailcpus);
ilock(dp);
if (m->machno == 0)
iprint("cache coherency pass");
iunlock(dp);
synccpus(&dp->sync, 2 * navailcpus);
adec(&dp->sync);
adec(&dp->sync);
/*
* cpus contend
*/
for (pass = 0; pass < 3; pass++)
pass1(pass, dp);
/*
* synchronise and check sanity
*/
synccpus(&dp->sync, navailcpus);
if(dp->sync < navailcpus || dp->sync >= 2 * navailcpus)
panic("cpu%d: diag: failed w dp->sync %ld", m->machno,
dp->sync);
if(dp->cnt != 0)
panic("cpu%d: diag: failed w dp->cnt %ld", m->machno,
dp->cnt);
ilock(dp);
iprint(" cpu%d ok", m->machno);
iunlock(dp);
synccpus(&dp->sync, 2 * navailcpus);
adec(&dp->sync);
adec(&dp->sync);
l1cache->wb();
/*
* all done, print
*/
ilock(dp);
if (m->machno == 0)
iprint("\n");
iunlock(dp);
}
static void
unfreeze(uint cpu)
{
Clkrst *clk = (Clkrst *)soc.clkrst;
Flow *flow = (Flow *)soc.flow;
assert(cpu < Maxcpus);
clk->clkcpu &= ~(Cpu0stop << cpu);
coherence();
/* out of reset */
clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) <<
cpu;
coherence();
assert(cpu < Maxflowcpus);
*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;
coherence();
*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */
coherence();
}
/*
* this is all a bit magic. the soc.exceptvec register is effectively
* undocumented. we had to look at linux and experiment, alas. this is the
* sort of thing that should be standardised as part of the cortex mpcore spec.
* even intel document their equivalent procedure.
*/
int
startcpu(uint cpu)
{
int i, r;
ulong oldvec, rstaddr;
ulong *evp = (ulong *)soc.exceptvec; /* magic */
r = 0;
if (getncpus() < 2 || cpu == m->machno ||
cpu >= MAXMACH || cpu >= navailcpus)
return -1;
oldvec = *evp;
l1cache->wb(); /* start next cpu w same view of ram */
*evp = rstaddr = PADDR(_vrst); /* will start cpu executing at _vrst */
coherence();
l1cache->wb();
unfreeze(cpu);
for (i = 2000; i > 0 && *evp == rstaddr; i--)
delay(1);
if (i <= 0 || *evp != cpu) {
iprint("cpu%d: didn't start!\n", cpu);
stopcpu(cpu); /* make sure it's stopped */
r = -1;
}
*evp = oldvec;
return r;
}
static void
cksecure(void)
{
ulong db;
extern ulong getdebug(void);
if (getscr() & 1)
panic("cpu%d: running non-secure", m->machno);
db = getdebug();
if (db)
iprint("cpu%d: debug enable reg %#lux\n", m->machno, db);
}
ulong
smpon(void)
{
ulong aux;
/* cortex-a9 model-specific configuration */
aux = getauxctl();
putauxctl(aux | CpACsmp | CpACmaintbcast);
return aux;
}
void
cortexa9cachecfg(void)
{
/* cortex-a9 model-specific configuration */
putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref);
}
/*
* called on a cpu other than 0 from cpureset in l.s,
* from _vrst in lexception.s.
* mmu and l1 (and system-wide l2) caches and coherency (smpon) are on,
* but interrupts are disabled.
* our mmu is using an exact copy of cpu0's l1 page table
* as it was after userinit ran.
*/
void
cpustart(void)
{
int ms;
ulong *evp;
Power *pwr;
up = nil;
if (active.machs & (1<<m->machno)) {
serialputc('?');
serialputc('r');
panic("cpu%d: resetting after start", m->machno);
}
assert(m->machno != 0);
errata();
cortexa9cachecfg();
memdiag(&testmem);
machinit(); /* bumps nmach, adds bit to machs */
machoff(m->machno); /* not ready to go yet */
/* clock signals and scu are system-wide and already on */
clockshutdown(); /* kill any watch-dog timer */
trapinit();
clockinit(); /* sets loop delay */
timersinit();
cpuidprint();
/*
* notify cpu0 that we're up so it can proceed to l1diag.
*/
evp = (ulong *)soc.exceptvec; /* magic */
*evp = m->machno;
coherence();
l1diag(); /* contend with other cpus to verify sanity */
/*
* pwr->noiopwr == 0
* pwr->detect == 0x1ff (default, all disabled)
*/
pwr = (Power *)soc.power;
assert(pwr->gatests == MASK(7)); /* everything has power */
/*
* 8169 has to initialise before we get past this, thus cpu0
* has to schedule processes first.
*/
if (Debug)
iprint("cpu%d: waiting for 8169\n", m->machno);
for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) {
delay(10);
cachedinvse(&l1ptstable.word, sizeof l1ptstable.word);
}
if (!l1ptstable.word)
iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno);
/* now safe to copy cpu0's l1 pt in mmuinit */
mmuinit(); /* update our l1 pt from cpu0's */
fpon();
machon(m->machno); /* now ready to go and be scheduled */
if (Debug)
iprint("cpu%d: scheding\n", m->machno);
schedinit();
panic("cpu%d: schedinit returned", m->machno);
}
/* mainly used to break out of wfi */
void
sgintr(Ureg *ureg, void *)
{
iprint("cpu%d: got sgi\n", m->machno);
/* try to prod cpu1 into life when it gets stuck */
if (m->machno != 0)
clockprod(ureg);
}
void
archreset(void)
{
static int beenhere;
if (beenhere)
return;
beenhere = 1;
/* conservative temporary values until archconfinit runs */
m->cpuhz = 1000 * Mhz; /* trimslice speed */
m->delayloop = m->cpuhz/2000; /* initial estimate */
prcachecfg();
clockson();
/* all partitions were powered up by u-boot, so needn't do anything */
archconfinit();
// resetusb();
fpon();
if (irqtooearly)
panic("archreset: too early for irqenable");
irqenable(Cpu0irq, sgintr, nil, "cpu0");
irqenable(Cpu1irq, sgintr, nil, "cpu1");
/* ... */
}
void
archreboot(void)
{
Clkrst *clk = (Clkrst *)soc.clkrst;
assert(m->machno == 0);
iprint("archreboot: reset!\n");
delay(20);
clk->rstdevl |= Sysreset;
coherence();
delay(500);
/* shouldn't get here */
splhi();
iprint("awaiting reset");
for(;;) {
delay(1000);
print(".");
}
}
void
kbdinit(void)
{
}
static void
missing(ulong addr, char *name)
{
static int firstmiss = 1;
if (addr == 0) {
iprint("address zero for %s\n", name);
return;
}
if (probeaddr(addr) >= 0)
return;
missed++;
if (firstmiss) {
iprint("missing:");
firstmiss = 0;
} else
iprint(",\n\t");
iprint(" %s at %#lux", name, addr);
}
/* verify that all the necessary device registers are accessible */
void
chkmissing(void)
{
delay(10);
missing(KZERO, "dram");
missing(soc.intr, "intr ctlr");
missing(soc.intrdist, "intr distrib");
missing(soc.tmr[0], "tegra timer1");
missing(soc.uart[0], "console uart");
missing(soc.pci, "pcie");
missing(soc.ether, "ether8169");
missing(soc.µs, "µs counter");
if (missed)
iprint("\n");
delay(10);
}
void
archflashwp(Flash*, int)
{
}
/*
* for ../port/devflash.c:/^flashreset
* retrieve flash type, virtual base and length and return 0;
* return -1 on error (no flash)
*/
int
archflashreset(int bank, Flash *f)
{
if(bank != 0)
return -1;
panic("archflashreset: rewrite for nor & nand flash on ts");
/*
* this is set up for the igepv2 board.
*/
f->type = "onenand";
f->addr = (void*)VIRTNOR; /* mapped here by archreset */
f->size = 0; /* done by probe */
f->width = 1;
f->interleave = 0;
return 0;
}
|