/*
* load kernel into memory
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "ureg.h"
#include "pool.h"
#include "../port/netif.h"
#include "../ip/ip.h"
#include "pxe.h"
#include <a.out.h>
#include "/sys/src/libmach/elf.h"
#undef KADDR
#undef PADDR
#define KADDR(a) ((void*)((ulong)(a) | KZERO))
#define PADDR(a) ((ulong)(a) & ~KSEGM)
extern int debug;
extern void pagingoff(ulong);
static uchar elfident[] = {
'\177', 'E', 'L', 'F',
};
static Ehdr ehdr, rehdr;
static E64hdr e64hdr;
static Phdr *phdr;
static P64hdr *p64hdr;
static int curphdr;
static ulong curoff;
static ulong elftotal;
static uvlong (*swav)(uvlong);
static long (*swal)(long);
static ushort (*swab)(ushort);
/*
* big-endian short
*/
ushort
beswab(ushort s)
{
uchar *p;
p = (uchar*)&s;
return (p[0]<<8) | p[1];
}
/*
* big-endian long
*/
long
beswal(long l)
{
uchar *p;
p = (uchar*)&l;
return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
}
/*
* big-endian vlong
*/
uvlong
beswav(uvlong v)
{
uchar *p;
p = (uchar*)&v;
return ((uvlong)p[0]<<56) | ((uvlong)p[1]<<48) | ((uvlong)p[2]<<40)
| ((uvlong)p[3]<<32) | ((uvlong)p[4]<<24)
| ((uvlong)p[5]<<16) | ((uvlong)p[6]<<8)
| (uvlong)p[7];
}
/*
* little-endian short
*/
ushort
leswab(ushort s)
{
uchar *p;
p = (uchar*)&s;
return (p[1]<<8) | p[0];
}
/*
* little-endian long
*/
long
leswal(long l)
{
uchar *p;
p = (uchar*)&l;
return (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
}
/*
* little-endian vlong
*/
uvlong
leswav(uvlong v)
{
uchar *p;
p = (uchar*)&v;
return ((uvlong)p[7]<<56) | ((uvlong)p[6]<<48) | ((uvlong)p[5]<<40)
| ((uvlong)p[4]<<32) | ((uvlong)p[3]<<24)
| ((uvlong)p[2]<<16) | ((uvlong)p[1]<<8)
| (uvlong)p[0];
}
/*
* Convert header to canonical form
*/
static void
hswal(long *lp, int n, long (*swap) (long))
{
while (n--) {
*lp = (*swap) (*lp);
lp++;
}
}
static void
hswav(uvlong *lp, int n, uvlong (*swap)(uvlong))
{
while (n--) {
*lp = (*swap)(*lp);
lp++;
}
}
static int
readehdr(Boot *b)
{
int i;
/* bitswap the header according to the DATA format */
if(ehdr.ident[CLASS] != ELFCLASS32) {
print("bad ELF class - not 32 bit\n");
return 0;
}
if(ehdr.ident[DATA] == ELFDATA2LSB) {
swab = leswab;
swal = leswal;
} else if(ehdr.ident[DATA] == ELFDATA2MSB) {
swab = beswab;
swal = beswal;
} else {
print("bad ELF encoding - not big or little endian\n");
return 0;
}
memmove(&rehdr, &ehdr, sizeof(Ehdr)); /* copy; never used */
ehdr.type = swab(ehdr.type);
ehdr.machine = swab(ehdr.machine);
ehdr.version = swal(ehdr.version);
ehdr.elfentry = swal(ehdr.elfentry);
ehdr.phoff = swal(ehdr.phoff);
ehdr.shoff = swal(ehdr.shoff);
ehdr.flags = swal(ehdr.flags);
ehdr.ehsize = swab(ehdr.ehsize);
ehdr.phentsize = swab(ehdr.phentsize);
ehdr.phnum = swab(ehdr.phnum);
ehdr.shentsize = swab(ehdr.shentsize);
ehdr.shnum = swab(ehdr.shnum);
ehdr.shstrndx = swab(ehdr.shstrndx);
if(ehdr.type != EXEC || ehdr.version != CURRENT)
return 0;
if(ehdr.phentsize != sizeof(Phdr))
return 0;
if(debug)
print("readehdr OK entry %#lux\n", ehdr.elfentry);
curoff = sizeof(Ehdr);
i = ehdr.phoff+ehdr.phentsize*ehdr.phnum - curoff;
b->state = READPHDR;
b->bp = (char*)smalloc(i);
b->wp = b->bp;
b->ep = b->wp + i;
elftotal = 0;
phdr = (Phdr*)(b->bp + ehdr.phoff-sizeof(Ehdr));
if(debug)
print("phdr...");
return 1;
}
static int
reade64hdr(Boot *b)
{
int i;
/* bitswap the header according to the DATA format */
if(e64hdr.ident[CLASS] != ELFCLASS64) {
print("bad ELF class - not 64 bit\n");
return 0;
}
if(e64hdr.ident[DATA] == ELFDATA2LSB) {
swab = leswab;
swal = leswal;
swav = leswav;
} else if(e64hdr.ident[DATA] == ELFDATA2MSB) {
swab = beswab;
swal = beswal;
swav = beswav;
} else {
print("bad ELF encoding - not big or little endian\n");
return 0;
}
// memmove(&rehdr, &ehdr, sizeof(Ehdr)); /* copy; never used */
e64hdr.type = swab(e64hdr.type);
e64hdr.machine = swab(e64hdr.machine);
e64hdr.version = swal(e64hdr.version);
e64hdr.elfentry = swav(e64hdr.elfentry);
e64hdr.phoff = swav(e64hdr.phoff);
e64hdr.shoff = swav(e64hdr.shoff);
e64hdr.flags = swal(e64hdr.flags);
e64hdr.ehsize = swab(e64hdr.ehsize);
e64hdr.phentsize = swab(e64hdr.phentsize);
e64hdr.phnum = swab(e64hdr.phnum);
e64hdr.shentsize = swab(e64hdr.shentsize);
e64hdr.shnum = swab(e64hdr.shnum);
e64hdr.shstrndx = swab(e64hdr.shstrndx);
if(e64hdr.type != EXEC || e64hdr.version != CURRENT)
return 0;
if(e64hdr.phentsize != sizeof(P64hdr))
return 0;
if(debug)
print("reade64hdr OK entry %#llux\n", e64hdr.elfentry);
curoff = sizeof(E64hdr);
i = e64hdr.phoff + e64hdr.phentsize*e64hdr.phnum - curoff;
b->state = READ64PHDR;
b->bp = (char*)smalloc(i);
b->wp = b->bp;
b->ep = b->wp + i;
elftotal = 0;
p64hdr = (P64hdr*)(b->bp + e64hdr.phoff-sizeof(E64hdr));
if(debug)
print("p64hdr...");
return 1;
}
static int
nextphdr(Boot *b)
{
Phdr *php;
ulong offset;
char *physaddr;
if(debug)
print("readedata %d\n", curphdr);
for(; curphdr < ehdr.phnum; curphdr++){
php = phdr+curphdr;
if(php->type != LOAD)
continue;
offset = php->offset;
physaddr = (char*)KADDR(PADDR(php->paddr));
if(offset < curoff){
/*
* Can't (be bothered to) rewind the
* input, it might be from tftp. If we
* did then we could boot FreeBSD kernels
* too maybe.
*/
return 0;
}
if(php->offset > curoff){
b->state = READEPAD;
b->bp = (char*)smalloc(offset - curoff);
b->wp = b->bp;
b->ep = b->wp + offset - curoff;
if(debug)
print("nextphdr %lud...\n", offset - curoff);
return 1;
}
b->state = READEDATA;
b->bp = physaddr;
b->wp = b->bp;
b->ep = b->wp+php->filesz;
print("%ud+", php->filesz);
elftotal += php->filesz;
if(debug)
print("nextphdr %ud@%#p\n", php->filesz, physaddr);
return 1;
}
if(curphdr != 0){
print("=%lud\n", elftotal);
b->state = TRYEBOOT;
b->entry = ehdr.elfentry;
// PLLONG(b->hdr.entry, b->entry);
return 1;
}
return 0;
}
static int
nextp64hdr(Boot *b)
{
P64hdr *php;
uvlong offset;
char *physaddr;
if(debug)
print("reade64data %d\n", curphdr);
for(; curphdr < e64hdr.phnum; curphdr++){
php = p64hdr+curphdr;
if(php->type != LOAD)
continue;
offset = php->offset;
physaddr = (char*)KADDR(PADDR(php->paddr));
if(offset < curoff){
/*
* Can't (be bothered to) rewind the
* input, it might be from tftp. If we
* did then we could boot FreeBSD kernels
* too maybe.
*/
return 0;
}
if(php->offset > curoff){
b->state = READE64PAD;
b->bp = (char*)smalloc(offset - curoff);
b->wp = b->bp;
b->ep = b->wp + offset - curoff;
if(debug)
print("nextp64hdr %llud...\n", offset - curoff);
return 1;
}
b->state = READE64DATA;
b->bp = physaddr;
b->wp = b->bp;
b->ep = b->wp+php->filesz;
print("%llud+", php->filesz);
elftotal += php->filesz;
if(debug)
print("nextp64hdr %llud@%#p\n", php->filesz, physaddr);
return 1;
}
if(curphdr != 0){
print("=%lud\n", elftotal);
b->state = TRYE64BOOT;
b->entry = e64hdr.elfentry;
return 1;
}
return 0;
}
static int
readepad(Boot *b)
{
Phdr *php;
php = phdr+curphdr;
if(debug)
print("readepad %d\n", curphdr);
curoff = php->offset;
return nextphdr(b);
}
static int
reade64pad(Boot *b)
{
P64hdr *php;
php = p64hdr+curphdr;
if(debug)
print("reade64pad %d\n", curphdr);
curoff = php->offset;
return nextp64hdr(b);
}
static int
readedata(Boot *b)
{
Phdr *php;
php = phdr+curphdr;
if(debug)
print("readedata %d\n", curphdr);
if(php->filesz < php->memsz){
print("%lud", php->memsz-php->filesz);
elftotal += php->memsz-php->filesz;
memset((char*)KADDR(PADDR(php->paddr)+php->filesz), 0,
php->memsz-php->filesz);
}
curoff = php->offset+php->filesz;
curphdr++;
return nextphdr(b);
}
static int
reade64data(Boot *b)
{
P64hdr *php;
php = p64hdr+curphdr;
if(debug)
print("reade64data %d\n", curphdr);
if(php->filesz < php->memsz){
print("%llud", php->memsz - php->filesz);
elftotal += php->memsz - php->filesz;
memset((char*)KADDR(PADDR(php->paddr) + php->filesz), 0,
php->memsz - php->filesz);
}
curoff = php->offset + php->filesz;
curphdr++;
return nextp64hdr(b);
}
static int
readphdr(Boot *b)
{
Phdr *php;
php = phdr;
hswal((long*)php, ehdr.phentsize*ehdr.phnum/sizeof(long), swal);
if(debug)
print("phdr curoff %lud vaddr %#lux paddr %#lux\n",
curoff, php->vaddr, php->paddr);
curoff = ehdr.phoff+ehdr.phentsize*ehdr.phnum;
curphdr = 0;
return nextphdr(b);
}
static int
readp64hdr(Boot *b)
{
int hdr;
P64hdr *php, *p;
php = p = p64hdr;
for (hdr = 0; hdr < e64hdr.phnum; hdr++, p++) {
hswal((long*)p, 2, swal);
hswav((uvlong*)&p->offset, 6, swav);
}
if(debug)
print("p64hdr curoff %lud vaddr %#llux paddr %#llux\n",
curoff, php->vaddr, php->paddr);
curoff = e64hdr.phoff + e64hdr.phentsize*e64hdr.phnum;
curphdr = 0;
return nextp64hdr(b);
}
static int
addbytes(char **dbuf, char *edbuf, char **sbuf, char *esbuf)
{
int n;
n = edbuf - *dbuf;
if(n <= 0)
return 0; /* dest buffer is full */
if(n > esbuf - *sbuf)
n = esbuf - *sbuf;
if(n <= 0)
return -1; /* src buffer is empty */
memmove(*dbuf, *sbuf, n);
*sbuf += n;
*dbuf += n;
return edbuf - *dbuf;
}
void
impulse(void)
{
delay(500); /* drain uart */
splhi();
/* turn off buffered serial console */
serialoq = nil;
/* shutdown devices */
chandevshutdown();
arch->introff();
}
void
prstackuse(int)
{
char *top, *base;
ulong *p;
base = up->kstack;
top = up->kstack + KSTACK - (sizeof(Sargs) + BY2WD);
for (p = (ulong *)base; (char *)p < top && *p ==
(Stkpat<<24 | Stkpat<<16 | Stkpat<<8 | Stkpat); p++)
;
print("proc stack: used %ld bytes, %ld left (stack pattern)\n",
top - (char *)p, (char *)p - base);
}
/*
* this code is simplified from reboot(). It doesn't need to relocate
* the new kernel nor deal with other processors.
*/
void
warp9(ulong entry)
{
// prstackuse(0); /* debugging */
mkmultiboot();
impulse();
/* get out of KZERO space, turn off paging and jump to entry */
pagingoff(PADDR(entry));
}
static int
bootfail(Boot *b)
{
b->state = FAILED;
return FAIL;
}
static int
isgzipped(uchar *p)
{
return p[0] == 0x1F && p[1] == 0x8B && p[2] == 0x08;
}
static int
readexec(Boot *b)
{
Exechdr *hdr;
ulong pentry, text, data, magic;
hdr = &b->hdr;
magic = GLLONG(hdr->magic);
if(magic == I_MAGIC || magic == S_MAGIC) {
pentry = PADDR(GLLONG(hdr->entry));
text = GLLONG(hdr->text);
data = GLLONG(hdr->data);
if (pentry < MB)
panic("kernel entry %#p below 1 MB", pentry);
if (PGROUND(pentry + text) + data > MB + Kernelmax)
panic("kernel larger than %d bytes", Kernelmax);
b->state = READ9TEXT;
b->bp = (char*)KADDR(pentry);
b->wp = b->bp;
b->ep = b->wp+text;
if(magic == I_MAGIC){
memmove(b->bp, b->hdr.uvl, sizeof(b->hdr.uvl));
b->wp += sizeof(b->hdr.uvl);
}
print("%lud", text);
} else if(memcmp(b->bp, elfident, 4) == 0 &&
(uchar)b->bp[4] == ELFCLASS32){
b->state = READEHDR;
b->bp = (char*)&ehdr;
b->wp = b->bp;
b->ep = b->wp + sizeof(Ehdr);
memmove(b->bp, &b->hdr, sizeof(Exechdr));
b->wp += sizeof(Exechdr);
print("elf...");
} else if(memcmp(b->bp, elfident, 4) == 0 &&
(uchar)b->bp[4] == ELFCLASS64){
b->state = READE64HDR;
b->bp = (char*)&e64hdr;
b->wp = b->bp;
b->ep = b->wp + sizeof(E64hdr);
memmove(b->bp, &b->hdr, sizeof(Exechdr));
b->wp += sizeof(Exechdr);
print("elf64...");
} else if(isgzipped((uchar *)b->bp)) {
b->state = READGZIP;
/* could use Unzipbuf instead of smalloc() */
b->bp = (char*)smalloc(Kernelmax);
b->wp = b->bp;
b->ep = b->wp + Kernelmax;
memmove(b->bp, &b->hdr, sizeof(Exechdr));
b->wp += sizeof(Exechdr);
print("gz...");
} else {
print("bad kernel format (magic %#lux)\n", magic);
return bootfail(b);
}
return MORE;
}
static void
boot9(Boot *b, ulong magic, ulong entry)
{
if(magic == I_MAGIC){
print("entry: %#lux\n", entry);
warp9(PADDR(entry));
}
else if(magic == S_MAGIC)
warp64(beswav(b->hdr.uvl[0]));
else
print("bad magic %#lux\n", magic);
}
/* only returns upon failure */
static void
readgzip(Boot *b)
{
ulong entry, text, data, bss, magic, all, pentry;
uchar *sdata;
Exechdr *hdr;
/* the whole gzipped kernel is now at b->bp */
hdr = &b->hdr;
if(!isgzipped((uchar *)b->bp)) {
print("lost magic\n");
return;
}
print("%ld => ", b->wp - b->bp);
/* just fill hdr from gzipped b->bp, to get various sizes */
if(gunzip((uchar*)hdr, sizeof *hdr, (uchar*)b->bp, b->wp - b->bp)
< sizeof *hdr) {
print("error uncompressing kernel exec header\n");
return;
}
/* assume uncompressed kernel is a plan 9 boot image */
magic = GLLONG(hdr->magic);
entry = GLLONG(hdr->entry);
text = GLLONG(hdr->text);
data = GLLONG(hdr->data);
bss = GLLONG(hdr->bss);
print("%lud+%lud+%lud=%lud\n", text, data, bss, text+data+bss);
pentry = PADDR(entry);
if (pentry < MB)
panic("kernel entry %#p below 1 MB", pentry);
if (PGROUND(pentry + text) + data > MB + Kernelmax)
panic("kernel larger than %d bytes", Kernelmax);
/* fill entry from gzipped b->bp */
all = sizeof(Exec) + text + data;
if(gunzip((uchar *)KADDR(PADDR(entry)) - sizeof(Exec), all,
(uchar*)b->bp, b->wp - b->bp) < all) {
print("error uncompressing kernel\n");
return;
}
/* relocate data to start at page boundary */
sdata = KADDR(PADDR(entry+text));
memmove((void*)PGROUND((uintptr)sdata), sdata, data);
boot9(b, magic, entry);
}
/*
* if nbuf is zero, boot.
* else add nbuf bytes from vbuf to b->wp (if there is room)
* and advance the state machine, which may reset b's pointers
* and return to the top.
*/
int
bootpass(Boot *b, void *vbuf, int nbuf)
{
char *buf, *ebuf;
Exechdr *hdr;
ulong entry, bss;
uvlong entry64;
if(b->state == FAILED)
return FAIL;
if(nbuf == 0)
goto Endofinput;
buf = vbuf;
ebuf = buf+nbuf;
/* possibly copy into b->wp from buf (not first time) */
while(addbytes(&b->wp, b->ep, &buf, ebuf) == 0) {
/* b->bp is full, so advance the state machine */
switch(b->state) {
case INITKERNEL:
b->state = READEXEC;
b->bp = (char*)&b->hdr;
b->wp = b->bp;
b->ep = b->bp+sizeof(Exechdr);
break;
case READEXEC:
readexec(b);
break;
case READ9TEXT:
hdr = &b->hdr;
b->state = READ9DATA;
b->bp = (char*)PGROUND((uintptr)
KADDR(PADDR(GLLONG(hdr->entry))) +
GLLONG(hdr->text));
b->wp = b->bp;
b->ep = b->wp + GLLONG(hdr->data);
print("+%ld", GLLONG(hdr->data));
break;
case READ9DATA:
hdr = &b->hdr;
bss = GLLONG(hdr->bss);
memset(b->ep, 0, bss);
print("+%ld=%ld\n",
bss, GLLONG(hdr->text)+GLLONG(hdr->data)+bss);
b->state = TRYBOOT;
return ENOUGH;
/*
* elf
*/
case READEHDR:
if(!readehdr(b))
print("readehdr failed\n");
break;
case READPHDR:
readphdr(b);
break;
case READEPAD:
readepad(b);
break;
case READEDATA:
readedata(b);
if(b->state == TRYEBOOT)
return ENOUGH;
break;
/*
* elf64
*/
case READE64HDR:
if(!reade64hdr(b))
print("reade64hdr failed\n");
break;
case READ64PHDR:
readp64hdr(b);
break;
case READE64PAD:
reade64pad(b);
break;
case READE64DATA:
reade64data(b);
if(b->state == TRYE64BOOT)
return ENOUGH;
break;
case TRYBOOT:
case TRYEBOOT:
case TRYE64BOOT:
case READGZIP:
return ENOUGH;
case READ9LOAD:
case INIT9LOAD:
panic("9load");
default:
panic("bootstate");
}
if(b->state == FAILED)
return FAIL;
}
return MORE;
Endofinput:
/* end of input */
switch(b->state) {
case INITKERNEL:
case READEXEC:
case READ9TEXT:
case READ9DATA:
case READEHDR:
case READPHDR:
case READEPAD:
case READEDATA:
case READE64HDR:
case READ64PHDR:
case READE64PAD:
case READE64DATA:
print("premature EOF\n");
break;
case TRYBOOT:
boot9(b, GLLONG(b->hdr.magic), GLLONG(b->hdr.entry));
break;
case TRYEBOOT:
entry = b->entry;
if(ehdr.machine == I386){
print("entry: %#lux\n", entry);
warp9(PADDR(entry));
}
else if(ehdr.machine == AMD64)
warp64(entry);
else
panic("elf boot: ehdr.machine %d unknown", ehdr.machine);
break;
case TRYE64BOOT:
entry64 = b->entry;
if(e64hdr.machine == I386){
print("entry: %#llux\n", entry64);
warp9(PADDR(entry64));
}
else if(e64hdr.machine == AMD64)
warp64(entry64);
else
panic("elf64 boot: e64hdr.machine %d unknown",
e64hdr.machine);
break;
case READGZIP:
readgzip(b);
break;
case INIT9LOAD:
case READ9LOAD:
panic("end 9load");
default:
panic("bootdone");
}
return bootfail(b);
}
|