/*
* Copyright (c) 2013, Coraid, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Coraid nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL CORAID BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <u.h>
#include <libc.h>
#include <thread.h>
#include <fcall.h>
#include <9p.h>
#include "dat.h"
enum {
GMperBlk = BlkSize / sizeof(GMeta),
};
static void freeblob(uvlong);
static uvlong nget;
static uvlong nrm;
static uvlong nset;
static uvlong nalloc;
static uvlong nfree;
static uvlong nmiss;
static QLock mlock;
static QLock alock;
static QLock block;
static GMeta *mbuf;
static uvlong mblk;
void
reammeta(int fd)
{
GMeta *mp;
Blob *bp;
char *p, *e;
int i, j, n;
/* First the GMeta structures */
p = θmalloc(16 * BlkSize);
e = p + 16 * BlkSize;
mp = (GMeta *)p;
mp->next = 0;
mp->type = MTistring;
strcpy(mp->name, "invalid");
strcpy(mp->m.str, "errnt");
for(++mp, j = 2; (char *)mp < e; ++mp, ++j)
mp->next = j;
pwrite(fd, p, 16 * BlkSize, super.firstmeta * BlkSize);
memset(p, 0, sizeof(GMeta));
for(i = 16; i < super.nmeta; i += 16) {
n = super.nmeta - i;
if(n > 16)
n = 16;
for(mp = (GMeta *)p; (char *)mp < e; ++mp, ++j)
mp->next = j;
if((i/16) % 10 == 0) fprint(2, ",");
pwrite(fd, p, n * BlkSize, (super.firstmeta + i) * BlkSize);
}
/* Now the string/blob pool */
for(i = 0; i < super.nblob; i += 16) {
n = super.nblob - i;
if(n > 16)
n = 16;
memset(p, 0, n * BlkSize);
for(j = 0; j < n; ++j) {
bp = (Blob *)(p + j * BlkSize);
bp->len = 0x8000 | (BlkSize/2 - sizeof(short));
bp->next = (super.firstblob + i + j) * BlkSize + BlkSize/2;
bp = (Blob *)(p + j * BlkSize + BlkSize/2);
bp->len = 0x8000 | (BlkSize/2 - sizeof(short));
if(i + j + 1 < super.nblob)
bp->next = (super.firstblob + i + j + 1) * BlkSize;
}
if((i/16) % 10 == 0) fprint(2, ";");
pwrite(fd, p, n * BlkSize, (super.firstblob + i) * BlkSize);
}
free(p);
}
void
resetmeta(void)
{
qlock(&mlock);
if(mbuf) {
brelease(mblk);
mbuf = nil;
mblk = 0;
}
qunlock(&mlock);
}
static int
getmstruct(int fd, GMeta *buf, uvlong idx)
{
uvlong off, blk;
if(idx > super.nmeta * (BlkSize / sizeof(GMeta))) {
fprint(2, "Invalid metadata index: %ulld\n", idx);
return -1;
}
if(fd != -1)
return spread(fd, buf, sizeof(GMeta), idx * sizeof(GMeta) + super.firstmeta * BlkSize);
blk = idx / GMperBlk + super.firstmeta;
off = idx % GMperBlk;
qlock(&mlock);
if(blk != mblk) {
++nmiss;
if(mbuf)
brelease(mblk);
mbuf = cbread(blk);
mblk = blk;
}
memmove(buf, mbuf + off, sizeof(GMeta));
qunlock(&mlock);
return sizeof(GMeta);
}
static int
savemstruct(GMeta *buf, uvlong idx)
{
uvlong off, blk;
if(idx > super.nmeta * (BlkSize / sizeof(GMeta))) {
fprint(2, "Invalid metadata index: %ulld\n", idx);
return -1;
}
blk = idx / GMperBlk + super.firstmeta;
off = idx % GMperBlk;
qlock(&mlock);
if(blk != mblk) {
++nmiss;
if(mbuf)
brelease(mblk);
mbuf = cbread(blk);
mblk = blk;
}
memmove(mbuf + off, buf, sizeof(GMeta));
cbwrite(blk);
qunlock(&mlock);
return sizeof(GMeta);
}
static uvlong
allocmeta(GMeta *buf)
{
uvlong nmeta;
qlock(&alock);
if(super.ffmeta == 0) {
fprint(2, "Out of metadata space!\n");
qunlock(&alock);
return 0;
}
if(getmstruct(-1, buf, super.ffmeta) < 0) {
qunlock(&alock);
return 0;
}
++nalloc;
nmeta = super.ffmeta;
super.ffmeta = buf->next;
savesuper();
qunlock(&alock);
return nmeta;
}
static void
freemeta(uvlong idx)
{
GMeta buf;
qlock(&alock);
if(getmstruct(-1, &buf, idx) < 0) {
qlock(&alock);
return;
}
if(buf.type == MTstring || buf.type == MTblob)
freeblob(buf.m.val);
++nfree;
memset(&buf, 0, sizeof(GMeta));
buf.next = super.ffmeta;
super.ffmeta = idx;
savesuper();
savemstruct(&buf, idx);
qunlock(&alock);
}
/*
static void
insmeta(GMeta *buf, uvlong idx, uvlong after)
{
GMeta abuf;
if(getmstruct(-1, &abuf, after) < 0)
return;
buf->next = abuf.next;
abuf.next = idx;
savemstruct(&abuf, after);
savemstruct(buf, idx);
}
*/
static Blob *
getbstruct(int fd, uvlong bp)
{
Blob *b;
void *a;
uvlong blk;
ulong off, m;
blk = bp / BlkSize;
if(blk < super.firstblob || blk >= super.firstblob + super.nblob)
return nil;
off = bp % BlkSize;
m = BlkSize - off;
if(m > 32768)
m = 32768;
if(fd == -1)
b = (Blob *)((char *)cbread(blk) + off);
else {
b = θmalloc(m);
spread(fd, b, m, bp);
}
m = b->len & 0x7fff;
a = θmalloc(m + sizeof(short));
memmove(a, b, m + sizeof(short));
if(fd == -1)
brelease(blk);
else
free(b);
return a;
}
static int
savebstruct(Blob *b, uvlong bp)
{
return cwrite(b, b->len & 0x7fff, bp);
}
static Blob *
allocblob(int n, uvlong *bp)
{
Blob *cb, *pb, *nb;
uvlong cur, prev;
int an;
an = (n + BlobQuan - 1) & ~(BlobQuan - 1);
prev = 0;
cb = nil;
pb = nil;
qlock(&block);
for(cur = super.ffblob; cur; ) {
cb = getbstruct(-1, cur);
if(cb == nil) {
qunlock(&block);
return nil;
}
if((cb->len & 0x7fff) >= an)
break;
pb = cb;
prev = cur;
cur = cb->next;
}
if(cur == 0) {
qunlock(&block);
free(cb);
fprint(2, "No free blobs\n");
return nil;
}
if((cb->len & 0x7fff) >= an + BlobQuan) {
if(prev != 0) {
pb->next = cur + an;
savebstruct(pb, prev);
}
else {
super.ffblob = cur + an;
savesuper();
}
nb = (Blob *)((char *)cb + an);
nb->len = cb->len - an;
nb->next = cb->next;
savebstruct(nb, cur + an);
cb->len = an - sizeof(short);
}
else {
if(prev != 0) {
pb->next = cb->next;
savebstruct(pb, prev);
}
else {
super.ffblob = cb->next;
savesuper();
}
cb->len &= 0x7fff;
}
qunlock(&block);
free(pb);
if(bp)
*bp = cur;
return cb;
}
/*
* This is a pretty gross hack, and probably oversimplified.
* However, I'm not real happy with this part and may redo
* it anyway. When freeing a blob (or long string) in the pool,
* we don't attempt to coalesce them, we just add it on to
* the end of the list. Until we've done enough allocations
* to use up all the pool space once, we don't really care.
* Because the most common use for the pool space is strings
* and the quantum is set to 64, I suspect nearly all requests
* will be satisfied with a single quantum and coalescing
* wouldn't have significant benefit anyway. So there's
* the excuse for taking what is probably too simple an
* approach.
*/
static void
freeblob(uvlong bp)
{
Blob *b, *b2;
qlock(&block);
b = getbstruct(-1, bp);
b->len |= 0x8000;
b->next = 0;
savebstruct(b, bp);
b2 = getbstruct(-1, super.lfblob);
if(b2) {
b2->next = bp;
savebstruct(b2, super.lfblob);
super.lfblob = bp;
savesuper();
}
else
fprint(2, "Unexpected failure to read super.lfblob\n");
qunlock(&block);
}
void *
getblob(int fd, uvlong bp, int *n)
{
Blob *b;
void *a;
b = getbstruct(fd, bp);
if(b == nil)
return nil;
if(b->len & 0x8000) {
free(b);
return nil;
}
if(n)
*n = b->len;
a = θmalloc(b->len);
memmove(a, b->data, b->len);
free(b);
return a;
}
uvlong
setblob(void *blob, int n, uvlong bp)
{
Blob *b;
uvlong nbp;
b = getbstruct(-1, bp);
if(b) {
if(b->len == n) {
memmove(b->data, blob, n);
savebstruct(b, bp);
return bp;
}
else {
b->len |= 0x8000;
savebstruct(b, bp);
}
}
b = allocblob(n, &nbp);
if(b == nil)
return 0;
memmove(b->data, blob, n);
savebstruct(b, nbp);
free(b);
return nbp;
}
int
getmeta(int fd, uvlong stidx, char *name, MVal *val)
{
GMeta buf;
uvlong next;
++nget;
for(next = stidx; next; ) {
if(getmstruct(fd, &buf, next) < 0)
return MTnone;
if(strcmp(name, buf.name) == 0)
break;
next = buf.next;
}
if(next == 0)
return MTnone;
*val = buf.m;
return buf.type;
}
int
getmetaint(int fd, uvlong stidx, char *name, uvlong *val)
{
MVal x;
int typ;
typ = getmeta(fd, stidx, name, &x);
switch(typ) {
case MTint:
*val = x.val;
return typ;
default:
return MTnone;
}
}
char *
getmetastr(int fd, uvlong stidx, char *name)
{
MVal x;
char *p;
int typ;
typ = getmeta(fd, stidx, name, &x);
switch(typ) {
case MTistring:
p = estrdup9p(x.str);
return p;
case MTstring:
return getblob(fd, x.val, nil);
default:
return nil;
}
}
uvlong
setmeta(uvlong stidx, char *name, char *newname, uvlong val, int type)
{
GMeta buf, nbuf;
uvlong next, last, nmeta;
++nset;
last = 0;
for(next = stidx; next; ) {
if(getmstruct(-1, &buf, next) < 0)
return 0;
last = next;
if(strcmp(name, buf.name) == 0) {
if(type == MTistring) {
if(buf.type == MTstring)
freeblob(buf.m.val);
strcpy(buf.m.str, (char *)val);
}
else
buf.m.val = val;
buf.type = type; /* in case we're changing the string length */
if(newname)
strcpy(buf.name, newname);
savemstruct(&buf, last);
return last;
}
next = buf.next;
}
nmeta = allocmeta(&nbuf);
if(nmeta == 0)
return 0;
if(last == 0)
nbuf.next = 0;
else {
nbuf.next = buf.next;
buf.next = nmeta;
savemstruct(&buf, last);
}
nbuf.type = type;
if(newname)
strcpy(nbuf.name, newname);
else
strcpy(nbuf.name, name);
if(type == MTistring)
strcpy(nbuf.m.str, (char *)val);
else
nbuf.m.val = val;
savemstruct(&nbuf, nmeta);
return nmeta;
}
uvlong
setmetaint(uvlong stidx, char *name, char *newname, uvlong val)
{
return setmeta(stidx, name, newname, val, MTint);
}
uvlong
setmetastr(uvlong stidx, char *name, char *newname, char *s, uvlong bp)
{
uvlong nbp;
int n;
n = strlen(s);
if(n <= 7) {
return setmeta(stidx, name, newname, (uvlong)s, MTistring);
}
nbp = setblob(s, n + 1, bp);
return setmeta(stidx, name, newname, nbp, MTstring);
}
void
setmstruct(uvlong idx, uvlong next, char *name, int type, uvlong val)
{
GMeta mb;
if(getmstruct(-1, &mb, idx) < 0)
return;
mb.next = next;
strcpy(mb.name, name);
mb.type = type;
mb.m.val = val;
savemstruct(&mb, idx);
}
uvlong
setmetablob(uvlong stidx, char *name, char *newname, uchar *blob, int n, uvlong bp)
{
uvlong nbp;
nbp = setblob(blob, n, bp);
return setmeta(stidx, name, newname, nbp, MTblob);
}
uvlong
rmmeta(uvlong midx, uvlong victim)
{
GMeta buf, vbuf;
uvlong next;
++nrm;
if(getmstruct(-1, &vbuf, victim) < 0)
return midx;
if(midx == victim) {
next = vbuf.next;
freemeta(victim);
return next;
}
for(next = midx; next; ) {
if(getmstruct(-1, &buf, next) < 0)
return midx;
if(buf.next == victim) {
buf.next = vbuf.next;
freemeta(victim);
savemstruct(&buf, next);
return midx;
}
next = vbuf.next;
}
return midx;
}
void
rmmlist(uvlong midx)
{
GMeta buf;
uvlong next;
++nrm;
next = midx;
while(next) {
if(getmstruct(-1, &buf, next) < 0)
return;
freemeta(next);
next = buf.next;
}
}
static uvlong
promote1(uvlong midx, uvlong dblk, int)
{
uvlong *p;
uvlong nblk;
nblk = allocblock();
if(nblk == 0)
return 0;
p = cbclean(nblk);
p[0] = dblk;
cbwrite(nblk);
setmetaint(midx, "dblock", "index", nblk);
brelease(nblk);
return nblk;
}
static uvlong
promote2(uvlong midx, uvlong iblk, int)
{
uvlong *p;
uvlong nblk;
nblk = allocblock();
if(nblk == 0)
return 0;
p = cbclean(nblk);
p[0] = iblk;
cbwrite(nblk);
setmetaint(midx, "index", "indirect", nblk);
brelease(nblk);
return nblk;
}
static uvlong
promote3(uvlong midx, uvlong iblk, int levels)
{
uvlong *p;
char *name;
uvlong nblk;
if(levels == 1) {
nblk = allocblock();
if(nblk == 0)
return 0;
p = cbclean(nblk);
p[0] = iblk;
cbwrite(nblk);
brelease(nblk);
iblk = nblk;
}
nblk = allocblock();
if(nblk == 0)
return 0;
p = cbclean(nblk);
p[0] = iblk;
cbwrite(nblk);
if(levels == 1)
name = "index";
else
name = "indirect";
setmetaint(midx, name, "dblindir", nblk);
brelease(nblk);
return nblk;
}
static uvlong
doindir(int fd, uvlong iblk, int off, int allocate)
{
uvlong *p;
uvlong pblk;
if(iblk < super.firstdat || iblk >= super.nblk)
return 0;
if(fd != -1)
spread(fd, &pblk, sizeof(uvlong), iblk * BlkSize + off * sizeof(uvlong));
else {
p = cbread(iblk);
pblk = p[off];
if(pblk == 0) {
if(allocate) {
pblk = allocblock();
if(pblk == 0)
return 0;
p[off] = pblk;
cbwrite(iblk);
cbclean(pblk);
cbwrite(pblk);
brelease(pblk);
}
}
brelease(iblk);
}
return pblk;
}
uvlong
locate(int fd, uvlong midx, uvlong vblk, int allocate)
{
uvlong *p;
uvlong iblk, pblk;
ulong pperb;
int levels, l1off, l2off, l3off;
if(getmetaint(fd, midx, "dblindir", &iblk) == MTint)
levels = 3;
else if(getmetaint(fd, midx, "indirect", &iblk) == MTint)
levels = 2;
else if(getmetaint(fd, midx, "index", &iblk) == MTint)
levels = 1;
else if(getmetaint(fd, midx, "dblock", &iblk) == MTint)
levels = 0;
else
return 0;
pperb = BlkSize / sizeof(uvlong);
l1off = vblk % pperb;
l2off = (vblk / pperb) % pperb;
l3off = vblk / (pperb * pperb);
if(levels < 3 && l3off != 0) {
iblk = promote3(midx, iblk, levels);
levels = 3;
}
else if(levels < 2 && l2off != 0) {
iblk = promote2(midx, iblk, levels);
levels = 2;
}
else if(levels < 1 && l1off > 0) {
iblk = promote1(midx, iblk, levels);
levels = 1;
}
pblk = 0;
switch(levels) {
case 3:
iblk = doindir(fd, iblk, l3off, allocate);
case 2:
iblk = doindir(fd, iblk, l2off, allocate);
case 1:
if(iblk == 0)
return 0;
p = cbread(iblk);
pblk = p[l1off];
if(pblk == 0 && allocate) {
pblk = allocblock();
cbclean(pblk);
p[l1off] = pblk;
cbwrite(pblk);
brelease(pblk);
cbwrite(iblk);
}
brelease(iblk);
break;
case 0:
pblk = iblk;
if(pblk == 0 && allocate) {
pblk = allocblock();
cbclean(pblk);
setmetaint(midx, "dblock", nil, pblk);
cbwrite(pblk);
brelease(pblk);
}
break;
}
if(pblk < super.firstdat || pblk >= super.nblk) {
fprint(2, "Bogus block number found in locate: index:%ulld\n", iblk);
return 0;
}
return pblk;
}
void
freedata(uvlong midx)
{
uvlong *index1, *index2, *index3;
uvlong iblk;
int i, j, k;
if(getmetaint(-1, midx, "dblindir", &iblk) == MTint) {
if(iblk == 0)
return;
if(iblk < super.firstdat) {
fprint(2,"Bogus dblindir block in freedat: %ulld\n", iblk);
return;
}
index3 = cbread(iblk);
for(i = 0; i < BlkSize / sizeof(uvlong); ++i) {
if(index3[i] >= super.firstdat && index3[i] < super.nblk) {
index2 = cbread(index3[i]);
for(j = 0; j < BlkSize / sizeof(uvlong); ++j) {
if(index2[j] >= super.firstdat && index2[j] < super.nblk) {
index1 = cbread(index2[j]);
for(k = 0; k < BlkSize / sizeof(uvlong); ++k)
if(index1[k] != 0)
freeblock(index1[k]);
brelease(index2[j]);
freeblock(index2[j]);
}
}
brelease(index3[i]);
freeblock(index3[i]);
}
}
brelease(iblk);
freeblock(iblk);
}
else if(getmetaint(-1, midx, "indirect", &iblk) == MTint) {
if(iblk == 0)
return;
if(iblk < super.firstdat) {
fprint(2, "Bogus indirect block in freedat: %ulld\n", iblk);
return;
}
index2 = cbread(iblk);
for(i = 0; i < BlkSize / sizeof(uvlong); ++i) {
if(index2[i] >= super.firstdat && index2[i] < super.nblk) {
index1 = cbread(index2[i]);
for(j = 0; j < BlkSize / sizeof(uvlong); ++j)
if(index1[j] != 0)
freeblock(index1[j]);
brelease(index2[i]);
freeblock(index2[i]);
}
}
brelease(iblk);
freeblock(iblk);
}
else if(getmetaint(-1, midx, "index", &iblk) == MTint) {
if(iblk == 0)
return;
if(iblk < super.firstdat) {
fprint(2, "Bogus index block in freedat: %ulld\n", iblk);
return;
}
index1 = cbread(iblk);
for(i = 0; i < BlkSize / sizeof(uvlong); ++i)
if(index1[i] != 0)
freeblock(index1[i]);
brelease(iblk);
freeblock(iblk);
}
}
void
prmeta(int fd, uvlong qpath)
{
GMeta buf;
char *p;
uvlong meta, next;
int i, n;
meta = q2m(-1, qpath, 0);
if(meta == 0) {
fprint(fd, "no metadata\n");
return;
}
for(next = meta; next; ) {
if(getmstruct(-1, &buf, next) < 0)
break;
switch(buf.type) {
case MTnone:
break;
case MTint:
fprint(fd, "%s: %ulld(%016ullx)\n", buf.name, buf.m.val, buf.m.val);
break;
case MTistring:
fprint(fd, "%s: %s\n", buf.name, buf.m.str);
break;
case MTstring:
p = getblob(-1, buf.m.val, nil);
fprint(fd, "%s: %s\n", buf.name, p);
free(p);
break;
case MTblob:
fprint(fd, "%s:", buf.name);
p = getblob(-1, buf.m.val, &n);
for(i = 0; i < n; ++i)
fprint(fd, " %02x", p[i]);
fprint(fd, "\n");
free(p);
break;
}
next = buf.next;
}
}
static char mstatbuf[1024];
char *
prmstat(void)
{
char *p, *e;
p = mstatbuf;
e = p + nelem(mstatbuf);
p = seprint(p, e, "Metadata stats:\n");
p = seprint(p, e, "getmeta calls: %ulld\n", nget);
p = seprint(p, e, "setmeta calls: %ulld\n", nset);
p = seprint(p, e, "rmmeta calls: %ulld\n", nrm);
p = seprint(p, e, "alloc calls: %ulld\n", nalloc);
p = seprint(p, e, "free calls: %ulld\n", nfree);
seprint(p, e, "misses: %ulld\n", nmiss);
return mstatbuf;
}
static uvlong
qoffset(ulong bucket)
{
return BlkSize * (super.nhashblk + 1) + bucket * sizeof(uvlong);
}
void
recovermeta(int fd)
{
GMeta mb;
uvlong midx1, midx2, qhnext, qb, n;
int saidit;
/* First set all the marker flags */
fprint(fd, "Setting flags\n");
for(midx1 = 1; midx1 < super.nmeta * GMperBlk; ++midx1) {
getmstruct(-1, &mb, midx1);
mb.type |= 0x80;
savemstruct(&mb, midx1);
}
/* Go through all the q2m hash table and mark referenced ones in use */
fprint(fd, "Marking ones referenced from QID hash table\n");
for(qb = 0; qb < super.nht; ++qb) {
cread(&midx1, sizeof(uvlong), qoffset(qb));
qhnext = 0;
n = 0;
saidit = 0;
while(midx1 != 0) {
++n;
getmstruct(-1, &mb, midx1);
mb.type &= 0x7f;
if(mb.type == MTnone && !saidit) {
fprint(fd, "Unexpected null metadatum at %ulld in bucket %ulld\n", midx1, qb);
saidit = 1;
}
midx2 = midx1;
midx1 = mb.next;
if(strcmp(mb.name, "qhnext") == 0 && mb.m.val != 0) {
qhnext = mb.m.val;
fprint(fd, "Warning, QID collision qb:%ulld midx:%ulld\n", qb, midx2);
}
savemstruct(&mb, midx2);
if(midx1 == 0) {
midx1 = qhnext;
qhnext = 0;
}
}
if(n > 128)
fprint(fd, "Unexpected large list at qb %ulld, size %ulld\n", qb, n);
}
fprint(fd, "Sizing old free list\n");
n = 0;
for(midx1 = super.ffmeta; midx1 != 0; ) {
++n;
if(n >= super.nmeta * GMperBlk) {
fprint(fd, "Cycle in old free list?\n");
break;
}
getmstruct(-1, &mb, midx1);
midx1 = mb.next;
}
fprint(fd, "Old free list has %ulld structures\n", n);
/* Reclaim the free ones */
fprint(fd, "Rebuilding free list\n");
n = 0;
super.ffmeta = 0;
for(midx1 = super.nmeta * GMperBlk - 1; midx1 != 0; --midx1) {
getmstruct(-1, &mb, midx1);
if(mb.type & 0x80) {
memset(&mb, 0, sizeof(GMeta));
mb.next = super.ffmeta;
super.ffmeta = midx1;
savemstruct(&mb, midx1);
++n;
}
}
savesuper();
fprint(fd, "Recovered %ulld free metadata structures\n", n);
}
static int
markinuse(char *shadow, uvlong blk)
{
long byt;
int bit, old;
if(blk < super.firstdat || blk >= super.nblk)
return -1;
byt = blk / 8;
bit = blk % 8;
old = shadow[byt] & (1 << bit);
shadow[byt] &= ~(1 << bit);
return old;
}
/* Dealing with humans always makes the code ugly. */
static char *idxnames[] = {"data", "index", "indirect", "dblindir"};
static void
chkidxalloc(int fd, char *shadow, uvlong blk, uvlong midx, int lev)
{
uvlong *iblk;
int i;
if(blk == 0)
return;
switch(markinuse(shadow, blk)) {
case -1:
fprint(fd, "Bogus %s block: %ulld metadataum %ulld\n", idxnames[lev+1], blk, midx);
return;
case 0:
fprint(fd, "Doubly allocated %s block: %ulld meta %ulld\n", idxnames[lev+1], blk, midx);
break;
}
iblk = cbread(blk);
if(iblk == nil) {
fprint(fd, "unexpected error reading block %ulld\n", blk);
return;
}
for(i = 0; i < BlkSize / sizeof(uvlong); ++i) {
if(iblk[i] == 0)
continue;
if(lev > 0)
chkidxalloc(fd, shadow, iblk[i], midx, lev - 1);
else {
switch(markinuse(shadow, iblk[i])) {
case -1:
fprint(fd, "Bogus %s block: %ulld in %s block %ulld meta %ulld\n",
idxnames[lev], iblk[i], idxnames[lev+1], blk, midx);
i = BlkSize / sizeof(uvlong);
break;
case 0:
fprint(fd, "Doubly allocated %s block: %ulld in %s block %ulld meta %ulld\n",
idxnames[lev], iblk[i], idxnames[lev+1], blk, midx);
break;
}
}
}
brelease(blk);
}
void
checkalloc(int fd)
{
GMeta mb;
uvlong *hblk;
char *shadow, *fb;
uvlong idx;
long i;
int j, k, l;
fprint(fd, "Initializing shadow free map\n");
shadow = θmalloc(super.nfreemap * BlkSize);
for(i = super.firstdat; i < super.nblk; ++i)
shadow[i/8] |= 1 << (i % 8);
for(i = 0, j = 0; j < super.nhashblk; ++j) {
hblk = cbread(j + 1);
for(k = 0; k < BlkSize / sizeof(uvlong) && i < super.nht; ++k, ++i) {
if(i % 100000 == 0)
fprint(fd, ".");
idx = hblk[k];
while(idx != 0) {
switch(markinuse(shadow, idx)) {
case -1:
fprint(fd, "Bogus block number in hash list for bucket %ld\n", i);
idx = 0;
break;
case 0:
fprint(fd, "Doubly allocated block in hash table: %ulld bucket %ld\n", idx, i);
default:
if(cread(&idx, sizeof(uvlong), idx * BlkSize + (BlkSize - sizeof(uvlong))) < 0) {
fprint(fd, "Error reading bucket next link: %ld %ulld\n", i, idx);
idx = 0;
}
if(idx != 0)
fprint(fd, ",");
break;
}
}
}
brelease(j + 1);
}
fprint(fd, "Scanning metadata\n");
for(idx = 1; idx < super.nmeta * GMperBlk; ++idx) {
if(idx % 100000 == 0)
fprint(fd, ".");
getmstruct(-1, &mb, idx);
if(mb.type != MTint)
continue;
if(strcmp(mb.name, "index") == 0)
chkidxalloc(fd, shadow, mb.m.val, idx, 0);
else if(strcmp(mb.name, "indirect") == 0)
chkidxalloc(fd, shadow, mb.m.val, idx, 1);
else if(strcmp(mb.name, "dblindir") == 0)
chkidxalloc(fd, shadow, mb.m.val, idx, 2);
}
fprint(fd, "Comparing to on-disk free map\n");
l = 0;
for(j = 0; j < super.nfreemap; ++j) {
fb = cbread(super.freemap + j);
for(k = 0; k < BlkSize; ++k) {
if(fb[k] != shadow[j*BlkSize+k]) {
if(++l < 10) /* don't flood the output with too many */
fprint(fd, "%d:%02ux-%02ux\n", j*BlkSize+k, fb[k], (uchar)shadow[j*BlkSize+k]);
if((fb[k] & shadow[j*BlkSize+k]) != fb[k]) {
fprint(fd, "Marking in use\n");
fb[k] &= shadow[j*BlkSize + k];
cbwrite(super.freemap + j);
}
}
}
brelease(super.freemap + j);
}
free(shadow);
}
void
mprint(int fd, uvlong idx)
{
GMeta mb;
char *p;
int i, n;
getmstruct(-1, &mb, idx);
switch(mb.type) {
case MTnone:
fprint(fd, "Meta:%ulld name:%s type:none next:%ulld val:%ulld\n", idx, mb.name, mb.next, mb.m.val);
break;
case MTint:
fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:%ulld(%016ullx)\n", idx, mb.name, mb.type, mb.next, mb.m.val, mb.m.val);
break;
case MTistring:
fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:%s\n", idx, mb.name, mb.type, mb.next, mb.m.str);
break;
case MTstring:
p = getblob(-1, mb.m.val, nil);
fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld val:(%ulld)%s\n", idx, mb.name, mb.type, mb.next, mb.m.val, p);
free(p);
break;
case MTblob:
fprint(fd, "Meta:%ulld name:%s type:%d next:%ulld ", idx, mb.name, mb.type, mb.next);
p = getblob(-1, mb.m.val, &n);
for(i = 0; i < n; ++i)
fprint(fd, " %02x", p[i]);
fprint(fd, "\n");
free(p);
break;
default:
fprint(fd, "unknown Meta:%ulld type%d next%ulld\n", idx, mb.type, mb.next);
break;
}
}
void
mpred(int fd, uvlong idx)
{
GMeta mb;
uvlong i;
for(i = 1; i < super.nmeta * GMperBlk; ++i) {
getmstruct(-1, &mb, i);
if(mb.type != MTnone && mb.next == idx) {
fprint(fd, "Meta:%ulld predecessor:%ulld\n", idx, i);
mprint(fd, i);
return;
}
}
}
static void
idxuse(int fd, uvlong iblk, uvlong blk, uvlong midx, int lev)
{
uvlong *bp;
int i;
if(iblk == 0)
return;
if(iblk == blk)
fprint(fd, "%s entry meta: %ulld\n", idxnames[lev+1], midx);
bp = cbread(iblk);
if(bp == nil) {
fprint(fd, "error reading block %ulld\n", iblk);
return;
}
for(i = 0; i < BlkSize / sizeof(uvlong); ++i) {
if(bp[i] == blk)
fprint(fd, "%s block in %s block %ulld in meta %ulld\n", idxnames[lev], idxnames[lev+1], iblk, midx);
if(lev > 0)
idxuse(fd, bp[i], blk, midx, lev - 1);
}
brelease(iblk);
}
void
blockuse(int fd, uvlong blk)
{
GMeta mb;
// PQMap *pq, *pend;
// char *p;
uvlong /* hlist, */ midx;
// long i;
if(blk == 0) {
fprint(fd, "superblock\n");
return;
}
if(blk < super.nhashblk + 1) {
fprint(fd, "P2Q hash table\n");
return;
}
if(blk < 2 * super.nhashblk + 1) {
fprint(fd, "Q2M hash table\n");
return;
}
if(blk >= super.freemap && blk < super.freemap + super.nfreemap) {
fprint(fd, "free bitmap\n");
return;
}
if(blk >= super.firstmeta && blk < super.firstmeta + super.nmeta) {
fprint(fd, "metadata structure pool\n");
return;
}
if(blk >= super.firstblob && blk < super.firstblob + super.nblob) {
fprint(fd, "string/blob pool\n");
return;
}
#ifdef NOTDEF
for(i = 0; i < super.nht; ++i) {
if(i % 100000 == 0)
fprint(fd, ".");
if(cread(&hlist, sizeof(uvlong), BlkSize + i * sizeof(uvlong)) < 0) {
fprint(fd, "Error reading bucket %ld\n", i);
continue;
}
if(hlist == blk)
fprint(fd, "P2Q hash bucket %ld\n", i);
while(hlist != 0) {
p = cbread(hlist);
if(p == nil) {
fprint(fd, "Error reading hash list block %ulld\n", hlist);
break;
}
pend = (PQMap *)(p + BlkSize);
--pend;
// for(pq = (PQMap *)p; pq < pend && pq->qpath != 0; pq = nextpq(pq)) {
// }
}
}
#endif
for(midx = 1; midx < super.nmeta * GMperBlk; ++midx) {
getmstruct(-1, &mb, midx);
if(mb.type != MTint)
continue;
if(mb.m.val == 0)
continue;
if(strcmp(mb.name, "index") == 0)
idxuse(fd, mb.m.val, blk, midx, 0);
else if(strcmp(mb.name, "indirect") == 0)
idxuse(fd, mb.m.val, blk, midx, 1);
else if(strcmp(mb.name, "dblindir") == 0)
idxuse(fd, mb.m.val, blk, midx, 2);
}
}
void
fixfamilies(int fd)
{
GMeta mb;
uvlong midx;
for(midx = 1; midx < super.nmeta * GMperBlk; ++midx) {
if(getmstruct(-1, &mb, midx) < 0)
continue;
if(mb.type != MTint)
continue;
if(strcmp(mb.name, "child") != 0 && strcmp(mb.name, "sib") != 0)
continue;
if(mb.m.val == 0)
continue;
if(q2m(-1, mb.m.val, 0) == 0) {
fprint(fd, "clearing dangling %s:%ulld in meta struct %ulld\n", mb.name, mb.m.val, midx);
mb.m.val = 0;
savemstruct(&mb, midx);
}
}
}
|