# Copyright M Heath 2003
# proof of concept code or tabfs
# takes a tab delimited file and presents it as a file system with the first row as column names and the first column as the primary key
# it will bork if these criteria are not met
# hope you've got plenty of memory, you're probably going to need it
implement tabfs1;
include "sys.m";
include "draw.m";
include "styx.m";
include "styxservers.m";
include "bufio.m";
include "string.m";
include "arg.m";
arg: Arg;
# sys
sys: Sys;
str : String;
# styx stuff
styx : Styx;
bufio : Bufio;
Rmsg : import styx;
styxservers: Styxservers;
Styxserver, Navigator: import styxservers;
nametree: Nametree;
Tree: import nametree;
tree : ref Tree;
Iobuf: import bufio;
# module definition
tabfs1 : module
{
init: fn(nil: ref Draw->Context, argv: list of string);
};
# my adts
src_file : adt {
filename : array of byte;
column_names : array of byte;
};
# other globals
bitshift : con 32;
Qroot, Qnew, Qmeta_folder, Qdata_folder, Qcolumn_names, Qfilename, Qctl, Qcolumn, Qrow, Qnumber_of_rows, Q: con big iota; # paths
src_files : array of src_file ;
file_count := 0;
columns : array of array of byte;
column_count := 0;
init(nil: ref Draw->Context, args : list of string)
{
sys = load Sys Sys->PATH;
arg = load Arg Arg->PATH;
f : string;
starting_filename : string;
# kludge some defaults in;
primary_key_column := 0;
column_names_in_first_row := 1;
arg->init(args);
while((c := arg->opt()) != 0)
case c {
'p' => (primary_key_column, nil) = str->toint(arg->arg(), 10);
'c' => column_names_in_first_row = 1;
'f' => starting_filename = arg->arg();
* => sys->print("unknown option (%c)\n", c);
}
if(f == nil) {
# sys->print("need a source filename that has tab separated columns\n");
# exit;
}
styx = load Styx Styx->PATH;
bufio = load Bufio Bufio->PATH;
treeop : chan of ref Styxservers->Navop;
str = load String String->PATH;
styx->init();
styxservers = load Styxservers Styxservers->PATH;
styxservers->init(styx);
nametree = load Nametree Nametree->PATH;
nametree->init();
sys->pctl(Sys->FORKNS, nil);
(tree, treeop) = nametree->start();
tree.create(Qroot, dir(".", 8r555|Sys->DMDIR, Qroot));
tree.create(Qroot, dir("new", 8r666, Qnew));
if(starting_filename != nil)
add_src_file(array of byte starting_filename, column_names_in_first_row, primary_key_column);
(tchan, srv) := Styxserver.new(sys->fildes(0),Navigator.new(treeop), Qroot);
reply : ref Rmsg;
while((gm := <-tchan) != nil) {
pick m := gm {
Read => {
fid := srv.getfid(m.fid);
qtype := big int(fid.path);
case (qtype) {
Qcolumn_names => {
i := int(fid.path >> bitshift);
reply = styxservers->readbytes(m, src_files[i].column_names);
}
Qfilename => {
i := int(fid.path >> bitshift);
reply = styxservers->readbytes(m, src_files[i].filename);
}
Qcolumn => {
i := int(fid.path >> bitshift);
reply = styxservers->readbytes(m, columns[i]);
}
}
}
Write => {
fid := srv.getfid(m.fid);
qtype := big int(fid.path);
case (qtype) {
Qnew => {
add_src_file(m.data, column_names_in_first_row, primary_key_column);
reply = ref Rmsg.Write(m.tag, len m.data);
}
Qctl => { # doesn't do anything yet
data := m.data;
reply = ref Rmsg.Write(m.tag, len m.data);
}
}
}
Remove => {
fid := srv.getfid(m.fid);
qtype := big int(fid.path);
case (qtype) {
Qcolumn => {
i := int(fid.path >> bitshift);
columns[i] = nil;
reply = ref Rmsg.Remove(m.tag);
}
}
if (reply != nil) {
srv.delfid(fid);
}
}
}
if(reply == nil) {
srv.default(gm);
} else {
srv.reply(reply);
reply = nil;
}
}
tree.quit();
}
dir(name: string, perm: int, qid: big): Sys->Dir {
d := sys->zerodir;
d.name = name;
d.uid = "inferno";
d.gid = "inferno";
d.qid.path = qid;
if (perm & Sys->DMDIR)
d.qid.qtype = Sys->QTDIR;
else
d.qid.qtype = Sys->QTFILE;
d.mode = perm;
return d;
}
extend_files() { # grow the files array
quarter := 5 + len src_files / 4; # formula plucked from thin air
new_files := array[len src_files + quarter] of src_file;
if (len src_files > 0)
new_files[0:] = src_files;
src_files = new_files;
}
add_src_file(name : array of byte, use_first_row_as_column_names, primary_key_column : int) {
if (file_count == len src_files)
extend_files();
src_files[file_count] = src_file(name, nil);
add_meta_folder();
read_rows(use_first_row_as_column_names, primary_key_column);
file_count++;
}
add_meta_folder() {
bfc := big file_count;
bfc = bfc << bitshift;
m_qid := bfc + Qmeta_folder;
d_qid := bfc + Qdata_folder;
tree.create(Qroot, dir(sys->sprint("%d", file_count), 8r755 | Sys->DMDIR, m_qid));
tree.create(m_qid, dir("data", 8r555 | Sys->DMDIR, d_qid));
tree.create(m_qid, dir("src_filename", 8r444, bfc + Qfilename));
tree.create(m_qid, dir("column_names", 8r444, bfc + Qcolumn_names));
tree.create(m_qid, dir("number_of_rows", 8r444, bfc + Qnumber_of_rows));
tree.create(m_qid, dir("ctl", 8r644, bfc +Qctl));
}
set_column_names_from_row(data : string) : int { # return the number of entries added I suppose -1 for fail,
if(data == "") return 0;
(column_row, nil) := str->splitl(data, "\n");
src_files[file_count].column_names = array of byte column_row;
return 1;
}
set_column_names_to_numbers(data : string) : int { # return the number of entries added I suppose -1 for fail,
if(data == "") return 0;
(column_row, nil) := str->splitl(data, "\n");
num_columns := len split_on_tabs(column_row);
row := "0";
for(i := 1; i < num_columns; i++)
row += sys->sprint("\t%d", i);
return set_column_names_from_row(row);
}
set_column_names_and_get_first_row(data_buf : ref Bufio->Iobuf, column_names_in_first_row : int) : string {
row := data_buf.gets('\n');
if(column_names_in_first_row) {
set_column_names_from_row(row);
row = data_buf.gets('\n');
} else {
set_column_names_to_numbers(row);
}
return row;
}
get_column_names_as_array() : array of string {
return list_to_array(split_on_tabs(string src_files[file_count].column_names));
}
list_to_array(lizt : list of string) : array of string {
num_columns := len lizt;
hooray := array[num_columns] of string;
for(i := num_columns -1; i > -1 ; i--) {
hooray[i] = hd lizt;
lizt = tl lizt;
}
return hooray;
}
read_rows(column_names_in_first_row, primary_key_column : int) : int { # return number of rows
bfc := big file_count;
bfc = bfc << bitshift;
d_qid := bfc + Qdata_folder;
row : string;
num_rows := 0;
data_buf := bufio->open(string src_files[file_count].filename, Bufio->OREAD);
row = set_column_names_and_get_first_row(data_buf, column_names_in_first_row);
column_name_lookup := get_column_names_as_array() ;
if (primary_key_column > len column_name_lookup)
primary_key_column = -1;
fields : array of string;
row_folder : big;
pk_value : string;
while(row != nil) {
fields = list_to_array(split_on_tabs(row));
if(primary_key_column < 0)
pk_value = sys->sprint("%d", num_rows);
else
pk_value = string fields[primary_key_column];
if(pk_value == "\n") {
row = data_buf.gets('\n');
continue;
}
bfl := big num_rows;
bfl = bfl << bitshift;
row_folder = bfl + Qrow;
tree.create(d_qid, dir(pk_value, 8r555|Sys->DMDIR, row_folder));
min_fields := min(len fields, len column_name_lookup); # being generous I'll read up to a maximum of the number of columns in the first row
for(j := 0; j < min_fields; j++)
add_column(row_folder, column_name_lookup[j], fields[j]);
num_rows++;
row = data_buf.gets('\n');
}
return num_rows;
}
add_column(parent : big, name : string, data : string) {
if (column_count == len columns)
extend_columns();
columns[column_count] = array of byte data;
bcc := big column_count;
bcc = bcc << bitshift;
tree.create(parent, dir(name, 8r444, bcc + Qcolumn));
column_count++;
}
extend_columns() {
quarter := 5 + len columns / 4; # formula plucked from thin air
new_columns := array[len columns + quarter] of array of byte;
if (len columns > 0)
new_columns [0:] = columns;
columns = new_columns ;
}
min(a, b : int) : int {
if(a < b) return a;
return b;
}
split_on_tabs(row : string) : list of string {
bits : list of string;
g := 0;
for(d:=0; d < len row; d++) {
if (row[d] == '\t') {
bits = row[g:d] :: bits;
g = d + 1;
}
}
if(d) bits = row[g:d] :: bits;
return bits;
}
|