#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include <mach.h>
/*
* file - determine type of file
*/
#define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
uchar buf[6001];
short cfreq[140];
short wfreq[50];
int nbuf;
Dir* mbuf;
int fd;
char *fname;
char *slash;
enum
{
Cword,
Fword,
Aword,
Alword,
Lword,
I1,
I2,
I3,
Clatin = 128,
Cbinary,
Cnull,
Ceascii,
Cutf,
};
struct
{
char* word;
int class;
} dict[] =
{
"PATH", Lword,
"TEXT", Aword,
"adt", Alword,
"aggr", Alword,
"alef", Alword,
"array", Lword,
"block", Fword,
"chan", Alword,
"char", Cword,
"common", Fword,
"con", Lword,
"data", Fword,
"dimension", Fword,
"double", Cword,
"extern", Cword,
"bio", I2,
"float", Cword,
"fn", Lword,
"function", Fword,
"h", I3,
"implement", Lword,
"import", Lword,
"include", I1,
"int", Cword,
"integer", Fword,
"iota", Lword,
"libc", I2,
"long", Cword,
"module", Lword,
"real", Fword,
"ref", Lword,
"register", Cword,
"self", Lword,
"short", Cword,
"static", Cword,
"stdio", I2,
"struct", Cword,
"subroutine", Fword,
"u", I2,
"void", Cword,
};
/* codes for 'mode' field in language structure */
enum {
Normal = 0,
First, /* first entry for language spanning several ranges */
Multi, /* later entries " " " ... */
Shared, /* codes used in several languages */
};
struct
{
int mode; /* see enum above */
int count;
int low;
int high;
char *name;
} language[] =
{
Normal, 0, 0x00a0, 0x00ff, "Latin",
Normal, 0, 0x0100, 0x01FF, "Extended Latin",
Normal, 0, 0x0370, 0x03FF, "Greek",
Normal, 0, 0x0400, 0x04FF, "Cyrillic",
Normal, 0, 0x0530, 0x058F, "Armenian",
Normal, 0, 0x0590, 0x05FF, "Hebrew",
Normal, 0, 0x0600, 0x06FF, "Arabic",
Normal, 0, 0x0900, 0x097F, "Devanagari",
Normal, 0, 0x0980, 0x09FF, "Bengali",
Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
Normal, 0, 0x0B00, 0x0B7F, "Oriya",
Normal, 0, 0x0B80, 0x0BFF, "Tamil",
Normal, 0, 0x0C00, 0x0C7F, "Telugu",
Normal, 0, 0x0C80, 0x0CFF, "Kannada",
Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
Normal, 0, 0x0E00, 0x0E7F, "Thai",
Normal, 0, 0x0E80, 0x0EFF, "Lao",
Normal, 0, 0x1000, 0x105F, "Tibetan",
Normal, 0, 0x10A0, 0x10FF, "Georgian",
Normal, 0, 0x3040, 0x30FF, "Japanese",
Normal, 0, 0x3100, 0x312F, "Chinese",
First, 0, 0x3130, 0x318F, "Korean",
Multi, 0, 0x3400, 0x3D2F, "Korean",
Shared, 0, 0x4e00, 0x9fff, "CJK",
Normal, 0, 0, 0, 0, /* terminal entry */
};
enum
{
Fascii, /* printable ascii */
Flatin, /* latin 1*/
Futf, /* UTf character set */
Fbinary, /* binary */
Feascii, /* ASCII with control chars */
Fnull, /* NULL in file */
} guess;
void bump_utf_count(Rune);
int cistrncmp(char*, char*, int);
void filetype(int);
int getfontnum(uchar*, uchar**);
int isas(void);
int isc(void);
int iscint(void);
int isenglish(void);
int ishp(void);
int ishtml(void);
int isrfc822(void);
int ismbox(void);
int islimbo(void);
int ismung(void);
int isp9bit(void);
int isp9font(void);
int isrtf(void);
int ismsdos(void);
int iself(void);
int isface(void);
int istring(void);
int iff(void);
int long0(void);
int istar(void);
int p9bitnum(uchar*);
int p9subfont(uchar*);
void print_utf(void);
void type(char*, int);
int utf_count(void);
void wordfreq(void);
// iself is called before long0 because crackhdr will pick up some elf
// executables, but not others.
int (*call[])(void) =
{
iself, /* recognizable by first 20 bytes: ELF (foreign) executable */
long0, /* recognizable by first 4 bytes */
istring, /* recognizable by first string */
iff, /* interchange file format (strings) */
isrfc822, /* email file */
ismbox, /* mail box */
istar, /* recognizable by tar checksum */
ishtml, /* html keywords */
iscint, /* compiler/assembler intermediate */
islimbo, /* limbo source */
isc, /* c & alef compiler key words */
isas, /* assembler key words */
ismung, /* entropy compressed/encrypted */
isp9font, /* plan 9 font */
isp9bit, /* plan 9 image (as from /dev/window) */
isenglish, /* char frequency English */
isrtf, /* rich text format */
ismsdos, /* msdos exe (virus file attachement) */
isface, /* olde-tyme face format */
0
};
int mime;
int hflag;
#define OCTET "application/octet-stream\n"
#define PLAIN "text/plain\n"
void
main(int argc, char *argv[])
{
int i, j, maxlen;
char *cp;
Rune r;
ARGBEGIN{
case 'm':
mime = 1;
break;
case 'h':
hflag = 1;
break;
default:
fprint(2, "usage: file [-mh] [file...]\n");
exits("usage");
}ARGEND;
maxlen = 0;
if(mime == 0 || argc > 1){
for(i = 0; i < argc; i++) {
for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
;
if(j > maxlen)
maxlen = j;
}
}
if (argc <= 0) {
if(!mime)
print ("stdin: ");
filetype(0);
}
else {
for(i = 0; i < argc; i++)
type(argv[i], maxlen);
}
exits(0);
}
void
type(char *file, int nlen)
{
Rune r;
int i;
char *p;
if(nlen > 0){
slash = 0;
for (i = 0, p = file; *p; i++) {
if (*p == '/') /* find rightmost slash */
slash = p;
p += chartorune(&r, p); /* count runes */
}
print("%s:%*s",file, nlen-i+1, "");
}
fname = file;
if ((fd = open(file, OREAD)) < 0) {
print("cannot open\n");
return;
}
filetype(fd);
close(fd);
}
static int
utf8len(char *s, char *e)
{
int c, n, i;
c = *(uchar*)s++;
if (0x80 == (c&0xc0) || 0xc0 == (c&0xe0))
n = 2;
else if ((c&0xf0) == 0xe0)
n = 3;
else if ((c&0xf8) == 0xf0)
n = 4;
else
return -1;
i = n-1;
if(e-s > i)
i = e-s;
for(; i-- && (c = *(uchar*)s++);)
if(0x80 != (c&0x80))
return -1;
return n;
}
void
filetype(int fd)
{
Rune r;
int i, f, n;
char *p, *eob;
uchar c;
free(mbuf);
mbuf = dirfstat(fd);
if(mbuf == nil){
print("cannot stat: %r\n");
return;
}
if(mbuf->mode & DMDIR) {
print(mime ? "text/directory\n" : "directory\n");
return;
}
if(mbuf->type != 'M' && mbuf->type != '|') {
print(mime ? OCTET : "special file #%c/%s\n",
mbuf->type, mbuf->name);
return;
}
nbuf = read(fd, buf, sizeof(buf)-1);
if(nbuf < 0) {
print("cannot read\n");
return;
}
if(nbuf == 0) {
print(mime ? PLAIN : "empty file\n");
return;
}
buf[nbuf] = 0;
/*
* build histogram table
*/
memset(cfreq, 0, sizeof(cfreq));
for (i = 0; language[i].name; i++)
language[i].count = 0;
eob = (char *)buf+nbuf;
for(n = 0, p = (char *)buf; p < eob; n++) {
c = *(uchar*)p;
if (c < 0x80) {
if(c == 0)
f = Cnull;
else if (!isprint(c) && !isspace(c))
f = Ceascii; /* ASCII control char */
else f = c;
} else if((i = utf8len(p, eob)) > 0) {
// special care for non-basic-plane codepoints
chartorune(&r, p);
p += i-1;
bump_utf_count(r);
f = Cutf;
} else if(c <= 0xa0)
f = Cbinary;
else
f = Clatin;
cfreq[f]++; /* ASCII chars peg directly */
p++;
}
if(hflag)
fprint(2, "n = %d, bin = %d, utf = %d, latin = %d, eascii = %d null = %d\n",
n, cfreq[Cbinary], cfreq[Cutf], cfreq[Clatin], cfreq[Ceascii], cfreq[Cnull]);
/*
* gross classify
*/
if (cfreq[Cbinary])
guess = Fbinary;
else if (cfreq[Cutf])
guess = Futf;
else if (cfreq[Clatin])
guess = Flatin;
else if (cfreq[Ceascii])
guess = Feascii;
else if (cfreq[Cnull] == n) {
print(mime ? OCTET : "first block all null bytes\n");
return;
}
else guess = Fascii;
/*
* lookup dictionary words
*/
memset(wfreq, 0, sizeof(wfreq));
if(guess == Fascii || guess == Flatin || guess == Futf)
wordfreq();
/*
* call individual classify routines
*/
for(i=0; call[i]; i++)
if((*call[i])())
return;
/*
* if all else fails,
* print out gross classification
*/
if (nbuf < 100 && !mime)
print(mime ? PLAIN : "short ");
if (guess == Fascii)
print(mime ? PLAIN : "Ascii\n");
else if (guess == Feascii)
print(mime ? PLAIN : "extended ascii\n");
else if (guess == Flatin)
print(mime ? PLAIN : "latin1\n");
else if (guess == Futf && utf_count() < 4)
print_utf();
else print(mime ? OCTET : "binary\n");
}
void
bump_utf_count(Rune r)
{
int low, high, mid;
high = sizeof(language)/sizeof(language[0])-1;
for (low = 0; low < high;) {
mid = (low+high)/2;
if (r >=language[mid].low) {
if (r <= language[mid].high) {
language[mid].count++;
break;
} else low = mid+1;
} else high = mid;
}
}
int
utf_count(void)
{
int i, count;
count = 0;
for (i = 0; language[i].name; i++)
if (language[i].count > 0)
switch (language[i].mode) {
case Normal:
case First:
count++;
break;
default:
break;
}
return count;
}
int
chkascii(void)
{
int i;
for (i = 'a'; i < 'z'; i++)
if (cfreq[i])
return 1;
for (i = 'A'; i < 'Z'; i++)
if (cfreq[i])
return 1;
return 0;
}
int
find_first(char *name)
{
int i;
for (i = 0; language[i].name != 0; i++)
if (language[i].mode == First
&& strcmp(language[i].name, name) == 0)
return i;
return -1;
}
void
print_utf(void)
{
int i, printed, j;
if(mime){
print(PLAIN);
return;
}
if (chkascii()) {
printed = 1;
print("Ascii");
} else
printed = 0;
for (i = 0; language[i].name; i++)
if (language[i].count) {
switch(language[i].mode) {
case Multi:
j = find_first(language[i].name);
if (j < 0)
break;
if (language[j].count > 0)
break;
/* Fall through */
case Normal:
case First:
if (printed)
print(" & ");
else printed = 1;
print("%s", language[i].name);
break;
case Shared:
default:
break;
}
}
if(!printed)
print("UTF");
print(" text\n");
}
void
wordfreq(void)
{
int low, high, mid, r;
uchar *p, *p2, c;
p = buf;
for(;;) {
while (p < buf+nbuf && !isalpha(*p))
p++;
if (p >= buf+nbuf)
return;
p2 = p;
while(p < buf+nbuf && isalpha(*p))
p++;
c = *p;
*p = 0;
high = sizeof(dict)/sizeof(dict[0]);
for(low = 0;low < high;) {
mid = (low+high)/2;
r = strcmp(dict[mid].word, (char*)p2);
if(r == 0) {
wfreq[dict[mid].class]++;
break;
}
if(r < 0)
low = mid+1;
else
high = mid;
}
*p++ = c;
}
}
typedef struct Filemagic Filemagic;
struct Filemagic {
ulong x;
ulong mask;
char *desc;
char *mime;
};
Filemagic long0tab[] = {
0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET,
0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET,
0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET,
0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET,
0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET,
0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip",
070707, 0xFFFF, "cpio archive\n", OCTET,
0x2F7, 0xFFFF, "tex dvi\n", "application/dvi",
0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg",
};
int
filemagic(Filemagic *tab, int ntab, ulong x)
{
int i;
for(i=0; i<ntab; i++)
if((x&tab[i].mask) == tab[i].x){
print(mime ? tab[i].mime : tab[i].desc);
return 1;
}
return 0;
}
int
long0(void)
{
Fhdr f;
long x;
seek(fd, 0, 0); /* reposition to start of file */
if(crackhdr(fd, &f)) {
print(mime ? OCTET : "%s\n", f.name);
return 1;
}
x = LENDIAN(buf);
if(filemagic(long0tab, nelem(long0tab), x))
return 1;
return 0;
}
/* from tar.c */
enum { NAMSIZ = 100, TBLOCK = 512 };
union hblock
{
char dummy[TBLOCK];
struct header
{
char name[NAMSIZ];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char chksum[8];
char linkflag;
char linkname[NAMSIZ];
/* rest are defined by POSIX's ustar format; see p1003.2b */
char magic[6]; /* "ustar" */
char version[2];
char uname[32];
char gname[32];
char devmajor[8];
char devminor[8];
char prefix[155]; /* if non-null, path = prefix "/" name */
} dbuf;
};
int
checksum(union hblock *hp)
{
int i;
char *cp;
struct header *hdr = &hp->dbuf;
for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
*cp = ' ';
i = 0;
for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
i += *cp & 0xff;
return i;
}
int
istar(void)
{
int chksum;
char tblock[TBLOCK];
union hblock *hp = (union hblock *)tblock;
struct header *hdr = &hp->dbuf;
seek(fd, 0, 0); /* reposition to start of file */
if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
return 0;
chksum = strtol(hdr->chksum, 0, 8);
if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
if (strcmp(hdr->magic, "ustar") == 0)
print(mime? "application/x-ustar\n":
"posix tar archive\n");
else
print(mime? "application/x-tar\n": "tar archive\n");
return 1;
}
return 0;
}
/*
* initial words to classify file
*/
struct FILE_STRING
{
char *key;
char *filetype;
int length;
char *mime;
} file_string[] =
{
"!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
"!<arch>\n", "archive", 8, "application/octet-stream",
"070707", "cpio archive - ascii header", 6, "application/octet-stream",
"#!/bin/rc", "rc executable file", 9, "text/plain",
"#!/bin/sh", "sh executable file", 9, "text/plain",
"%!", "postscript", 2, "application/postscript",
"\004%!", "postscript", 3, "application/postscript",
"x T post", "troff output for post", 8, "application/troff",
"x T Latin1", "troff output for Latin1", 10, "application/troff",
"x T utf", "troff output for UTF", 7, "application/troff",
"x T 202", "troff output for 202", 7, "application/troff",
"x T aps", "troff output for aps", 7, "application/troff",
"GIF", "GIF image", 3, "image/gif",
"\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
"%PDF", "PDF", 4, "application/pdf",
"<html>\n", "HTML file", 7, "text/html",
"<HTML>\n", "HTML file", 7, "text/html",
"compressed\n", "Compressed image or subfont", 11, "application/octet-stream",
"\111\111\052\000", "tiff", 4, "image/tiff",
"\115\115\000\052", "tiff", 4, "image/tiff",
"\377\330\377\340", "jpeg", 4, "image/jpeg",
"\377\330\377\341", "jpeg", 4, "image/jpeg",
"\377\330\377\333", "jpeg", 4, "image/jpeg",
"BM", "bmp", 2, "image/bmp",
"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream",
"<MakerFile ", "FrameMaker file", 11, "application/framemaker",
"\033%-12345X", "HPJCL file", 9, "application/hpjcl",
"ID3", "mp3 audio with id3", 3, "audio/mpeg",
"\211PNG", "PNG image", 4, "image/png",
"P3\n", "ppm", 3, "image/ppm",
"P6\n", "ppm", 3, "image/ppm",
"/* XPM */\n", "xbm", 10, "image/xbm",
0,0,0,0
};
int
istring(void)
{
int i;
struct FILE_STRING *p;
for(p = file_string; p->key; p++) {
if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) {
if(mime)
print("%s\n", p->mime);
else
print("%s\n", p->filetype);
return 1;
}
}
if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
for(i = 5; i < nbuf; i++)
if(buf[i] == '\n')
break;
if(mime)
print(OCTET);
else
print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
return 1;
}
return 0;
}
int
iff(void)
{
if (strncmp((char*)buf, "FORM", 4) == 0 &&
strncmp((char*)buf+8, "AIFF", 4) == 0) {
print("%s\n", mime? "audio/x-aiff": "aiff audio");
return 1;
}
return 0;
}
char* html_string[] =
{
"title",
"body",
"head",
"strong",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"ul",
"li",
"dl",
"br",
"em",
0,
};
int
ishtml(void)
{
uchar *p, *q;
int i, count;
/* compare strings between '<' and '>' to html table */
count = 0;
p = buf;
for(;;) {
while (p < buf+nbuf && *p != '<')
p++;
p++;
if (p >= buf+nbuf)
break;
if(*p == '/')
p++;
q = p;
while(p < buf+nbuf && *p != '>')
p++;
if (p >= buf+nbuf)
break;
for(i = 0; html_string[i]; i++) {
if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
if(count++ > 4) {
print(mime ? "text/html\n" : "HTML file\n");
return 1;
}
break;
}
}
p++;
}
return 0;
}
char* rfc822_string[] =
{
"from:",
"date:",
"to:",
"subject:",
"received:",
"reply to:",
"sender:",
0,
};
int
isrfc822(void)
{
char *p, *q, *r;
int i, count;
count = 0;
p = (char*)buf;
for(;;) {
q = strchr(p, '\n');
if(q == nil)
break;
*q = 0;
if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
count++;
*q = '\n';
p = q+1;
continue;
}
*q = '\n';
if(*p != '\t' && *p != ' '){
r = strchr(p, ':');
if(r == 0 || r > q)
break;
for(i = 0; rfc822_string[i]; i++) {
if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
count++;
break;
}
}
}
p = q+1;
}
if(count >= 3){
print(mime ? "message/rfc822\n" : "email file\n");
return 1;
}
return 0;
}
int
ismbox(void)
{
char *p, *q;
p = (char*)buf;
q = strchr(p, '\n');
if(q == nil)
return 0;
*q = 0;
if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
print(mime ? "text/plain\n" : "mail box\n");
return 1;
}
*q = '\n';
return 0;
}
int
iscint(void)
{
int type;
char *name;
Biobuf b;
if(Binit(&b, fd, OREAD) == Beof)
return 0;
seek(fd, 0, 0);
type = objtype(&b, &name);
if(type < 0)
return 0;
if(mime)
print(OCTET);
else
print("%s intermediate\n", name);
return 1;
}
int
isc(void)
{
int n;
n = wfreq[I1];
/*
* includes
*/
if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
goto yes;
if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
goto yes;
/*
* declarations
*/
if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
goto yes;
/*
* assignments
*/
if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
goto yes;
return 0;
yes:
if(mime){
print(PLAIN);
return 1;
}
if(wfreq[Alword] > 0)
print("alef program\n");
else
print("c program\n");
return 1;
}
int
islimbo(void)
{
/*
* includes
*/
if(wfreq[Lword] < 4)
return 0;
print(mime ? PLAIN : "limbo program\n");
return 1;
}
int
isas(void)
{
/*
* includes
*/
if(wfreq[Aword] < 2)
return 0;
print(mime ? PLAIN : "as program\n");
return 1;
}
/*
* low entropy means encrypted
*/
int
ismung(void)
{
int i, bucket[8];
float cs;
if(nbuf < 64)
return 0;
memset(bucket, 0, sizeof(bucket));
for(i=nbuf-64; i<nbuf; i++)
bucket[(buf[i]>>5)&07] += 1;
cs = 0.;
for(i=0; i<8; i++)
cs += (bucket[i]-8)*(bucket[i]-8);
cs /= 8.;
if(cs <= 24.322) {
if(buf[0]==0x1f && buf[1]==0x9d)
print(mime ? OCTET : "compressed\n");
else
if(buf[0]==0x1f && buf[1]==0x8b)
print(mime ? OCTET : "gzip compressed\n");
else
if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
print(mime ? OCTET : "bzip2 compressed\n");
else
print(mime ? OCTET : "encrypted\n");
return 1;
}
return 0;
}
/*
* english by punctuation and frequencies
*/
int
isenglish(void)
{
int vow, comm, rare, badpun, punct;
char *p;
if(guess != Fascii && guess != Feascii)
return 0;
badpun = 0;
punct = 0;
for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
switch(*p) {
case '.':
case ',':
case ')':
case '%':
case ';':
case ':':
case '?':
punct++;
if(p[1] != ' ' && p[1] != '\n')
badpun++;
}
if(badpun*5 > punct)
return 0;
if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
return 0;
if(2*cfreq[';'] > cfreq['e'])
return 0;
vow = 0;
for(p="AEIOU"; *p; p++) {
vow += cfreq[*p];
vow += cfreq[tolower(*p)];
}
comm = 0;
for(p="ETAION"; *p; p++) {
comm += cfreq[*p];
comm += cfreq[tolower(*p)];
}
rare = 0;
for(p="VJKQXZ"; *p; p++) {
rare += cfreq[*p];
rare += cfreq[tolower(*p)];
}
if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
print(mime ? PLAIN : "English text\n");
return 1;
}
return 0;
}
/*
* pick up a number with
* syntax _*[0-9]+_
*/
#define P9BITLEN 12
int
p9bitnum(uchar *bp)
{
int n, c, len;
len = P9BITLEN;
while(*bp == ' ') {
bp++;
len--;
if(len <= 0)
return -1;
}
n = 0;
while(len > 1) {
c = *bp++;
if(!isdigit(c))
return -1;
n = n*10 + c-'0';
len--;
}
if(*bp != ' ')
return -1;
return n;
}
int
depthof(char *s, int *newp)
{
char *es;
int d;
*newp = 0;
es = s+12;
while(s<es && *s==' ')
s++;
if(s == es)
return -1;
if('0'<=*s && *s<='9')
return 1<<strtol(s, 0, 0);
*newp = 1;
d = 0;
while(s<es && *s!=' '){
s++; /* skip letter */
d += strtoul(s, &s, 10);
}
switch(d){
case 32:
case 24:
case 16:
case 8:
return d;
}
return -1;
}
int
isp9bit(void)
{
int dep, lox, loy, hix, hiy, px, new;
ulong t;
long len;
char *newlabel;
newlabel = "old ";
dep = depthof((char*)buf + 0*P9BITLEN, &new);
if(new)
newlabel = "";
lox = p9bitnum(buf + 1*P9BITLEN);
loy = p9bitnum(buf + 2*P9BITLEN);
hix = p9bitnum(buf + 3*P9BITLEN);
hiy = p9bitnum(buf + 4*P9BITLEN);
if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
return 0;
if(dep < 8){
px = 8/dep; /* pixels per byte */
/* set l to number of bytes of data per scan line */
if(lox >= 0)
len = (hix+px-1)/px - lox/px;
else{ /* make positive before divide */
t = (-lox)+px-1;
t = (t/px)*px;
len = (t+hix+px-1)/px;
}
}else
len = (hix-lox)*dep/8;
len *= (hiy-loy); /* col length */
len += 5*P9BITLEN; /* size of initial ascii */
/*
* for image file, length is non-zero and must match calculation above
* for /dev/window and /dev/screen the length is always zero
* for subfont, the subfont header should follow immediately.
*/
if (len != 0 && mbuf->length == 0) {
print("%splan 9 image\n", newlabel);
return 1;
}
if (mbuf->length == len) {
print("%splan 9 image\n", newlabel);
return 1;
}
/* Ghostscript sometimes produces a little extra on the end */
if (mbuf->length < len+P9BITLEN) {
print("%splan 9 image\n", newlabel);
return 1;
}
if (p9subfont(buf+len)) {
print("%ssubfont file\n", newlabel);
return 1;
}
return 0;
}
int
p9subfont(uchar *p)
{
int n, h, a;
/* if image too big, assume it's a subfont */
if (p+3*P9BITLEN > buf+sizeof(buf))
return 1;
n = p9bitnum(p + 0*P9BITLEN); /* char count */
if (n < 0)
return 0;
h = p9bitnum(p + 1*P9BITLEN); /* height */
if (h < 0)
return 0;
a = p9bitnum(p + 2*P9BITLEN); /* ascent */
if (a < 0)
return 0;
return 1;
}
#define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
int
_isp9font(uchar *cp)
{
uchar *p;
int i, n;
char pathname[1024];
if (!getfontnum(cp, &cp)) /* height */
return 0;
if (!getfontnum(cp, &cp)) /* ascent */
return 0;
for (i = 0; 1; i++) {
if (!getfontnum(cp, &cp)) /* min */
break;
if (!getfontnum(cp, &cp)) /* max */
return 0;
getfontnum(cp, &cp); /* offset -- not required */
while (WHITESPACE(*cp))
cp++;
for (p = cp; *cp && !WHITESPACE(*cp); cp++)
;
/* construct a path name, if needed */
n = 0;
if (*p != '/' && slash) {
n = slash-fname+1;
if (n < sizeof(pathname))
memcpy(pathname, fname, n);
else n = 0;
}
if (n+cp-p < sizeof(pathname)) {
memcpy(pathname+n, p, cp-p);
n += cp-p;
pathname[n] = 0;
if (access(pathname, AEXIST) < 0) {
// fprint(2, "notfont %s\n", pathname);
return 0;
}
}
}
if (i) {
print(mime ? "text/plain\n" : "font file\n");
return 1;
}
return 0;
}
// fonts can be longer than 6k
int
isp9font(void)
{
Dir *d;
char *b;
vlong l;
int n;
d = dirfstat(fd);
if(!d)
return 0;
l = d->length;
free(d);
n = sizeof(buf)-1;
if(l < n)
return _isp9font(buf);
b = malloc(l+1);
if(!b)
return 0;
memcpy(b, buf, n);
seek(fd, n, 0);
if(readn(fd, b + n, l-n) != l-n)
return 0;
b[l] = 0;
n = _isp9font((uchar*)b);
free(b);
return n;
}
int
getfontnum(uchar *cp, uchar **rp)
{
while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
cp++;
if (*cp < '0' || *cp > '9')
return 0;
strtoul((char *)cp, (char **)rp, 0);
if (!WHITESPACE(**rp))
return 0;
return 1;
}
int
isrtf(void)
{
if(strstr((char *)buf, "\\rtf1")){
print(mime ? "application/rtf\n" : "rich text format\n");
return 1;
}
return 0;
}
int
ismsdos(void)
{
if (buf[0] == 0x4d && buf[1] == 0x5a){
print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
return 1;
}
return 0;
}
int
iself(void)
{
char *cpu[] = { /* NB: incomplete and arbitary list */
[1] "WE32100",
[2] "SPARC",
[3] "i386",
[4] "M68000",
[5] "M88000",
[6] "i486",
[7] "i860",
[8] "R3000",
[9] "S370",
[10] "R4000",
[15] "HP-PA",
[18] "sparc v8+",
[19] "i960",
[20] "PPC-32",
[21] "PPC-64",
[40] "ARM",
[41] "Alpha",
[43] "sparc v9",
[50] "IA-46",
[62] "AMD64",
[75] "VAX",
};
if (memcmp(buf, "\x7fELF", 4) == 0){
if (!mime){
int n = (buf[19] << 8) | buf[18];
char *p = "unknown";
if (n > 0 && n < nelem(cpu) && cpu[n])
p = cpu[n];
else {
/* try the other byte order */
n = (buf[18] << 8) | buf[19];
if (n > 0 && n < nelem(cpu) && cpu[n])
p = cpu[n];
}
print("%s ELF executable\n", p);
}
else
print("application/x-elf-executable");
return 1;
}
return 0;
}
int
isface(void)
{
int i, j, ldepth, l;
char *p;
ldepth = -1;
for(j = 0; j < 3; j++){
for(p = (char*)buf, i=0; i<3; i++){
if(p[0] != '0' || p[1] != 'x')
return 0;
if(buf[2+8] == ',')
l = 2;
else if(buf[2+4] == ',')
l = 1;
else
return 0;
if(ldepth == -1)
ldepth = l;
if(l != ldepth)
return 0;
strtoul(p, &p, 16);
if(*p++ != ',')
return 0;
while(*p == ' ' || *p == '\t')
p++;
}
if (*p++ != '\n')
return 0;
}
if(mime)
print("application/x-face\n");
else
print("face image depth %d\n", ldepth);
return 1;
}
|