Plan 9 from Bell Labs’s /usr/web/sources/plan9/sys/src/cmd/join.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*	join F1 F2 on stuff */
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>

enum {
	F1,
	F2,
	NIN,
	F0,
};

#define	NFLD	100	/* max field per line */
#define comp() runestrcmp(ppi[F1][j1], ppi[F2][j2])

Biobuf *f[NIN];
Rune buf[NIN][Bsize];	/* input lines */
Rune *ppi[NIN][NFLD+1];	/* pointers to fields in lines */
Rune	sep1	= ' ';	/* default field separator */
Rune	sep2	= '\t';
int	j1	= 1;	/* join of this field of file 1 */
int	j2	= 1;	/* join of this field of file 2 */
int	a1;
int 	a2;

int	olist[NIN*NFLD];  /* output these fields */
int	olistf[NIN*NFLD]; /* from these files */
int	no;		/* number of entries in olist */
char *sepstr	= " ";
int	discard;	/* count of truncated lines */
Rune	null[Bsize]	= L"";
Biobuf binbuf, boutbuf;
Biobuf *bin, *bout;

char	*getoptarg(int*, char***);
int	input(int);
void	join(int);
void	oparse(char*);
void	output(int, int);
Rune	*strtorune(Rune *, char *);

void
main(int argc, char **argv)
{
	int i;
	vlong off1, off2;

	bin = &binbuf;
	bout = &boutbuf;
	Binit(bin, 0, OREAD);
	Binit(bout, 1, OWRITE);

	argv0 = argv[0];
	while (argc > 1 && argv[1][0] == '-') {
		if (argv[1][1] == '\0')
			break;
		switch (argv[1][1]) {
		case '-':
			argc--;
			argv++;
			goto proceed;
		case 'a':
			switch(*getoptarg(&argc, &argv)) {
			case '1':
				a1++;
				break;
			case '2':
				a2++;
				break;
			default:
				sysfatal("incomplete option -a");
			}
			break;
		case 'e':
			strtorune(null, getoptarg(&argc, &argv));
			break;
		case 't':
			sepstr=getoptarg(&argc, &argv);
			chartorune(&sep1, sepstr);
			sep2 = sep1;
			break;
		case 'o':
			if(argv[1][2]!=0 ||
			   argc>2 && strchr(argv[2],',')!=0)
				oparse(getoptarg(&argc, &argv));
			else for (no = 0; no<2*NFLD && argc>2; no++){
				if (argv[2][0] == '1' && argv[2][1] == '.') {
					olistf[no] = F1;
					olist[no] = atoi(&argv[2][2]);
				} else if (argv[2][0] == '2' && argv[2][1] == '.') {
					olist[no] = atoi(&argv[2][2]);
					olistf[no] = F2;
				} else if (argv[2][0] == '0')
					olistf[no] = F0;
				else
					break;
				argc--;
				argv++;
			}
			break;
		case 'j':
			if(argc <= 2)
				break;
			if (argv[1][2] == '1')
				j1 = atoi(argv[2]);
			else if (argv[1][2] == '2')
				j2 = atoi(argv[2]);
			else
				j1 = j2 = atoi(argv[2]);
			argc--;
			argv++;
			break;
		case '1':
			j1 = atoi(getoptarg(&argc, &argv));
			break;
		case '2':
			j2 = atoi(getoptarg(&argc, &argv));
			break;
		}
		argc--;
		argv++;
	}
proceed:
	for (i = 0; i < no; i++)
		if (olist[i]-- > NFLD)	/* 0 origin */
			sysfatal("field number too big in -o");
	if (argc != 3) {
		fprint(2, "usage: join [-1 x -2 y] [-o list] file1 file2\n");
		exits("usage");
	}
	if (j1 < 1  || j2 < 1)
		sysfatal("invalid field indices");
	j1--;
	j2--;	/* everyone else believes in 0 origin */

	if (strcmp(argv[1], "-") == 0)
		f[F1] = bin;
	else if ((f[F1] = Bopen(argv[1], OREAD)) == 0)
		sysfatal("can't open %s: %r", argv[1]);
	if(strcmp(argv[2], "-") == 0)
		f[F2] = bin;
	else if ((f[F2] = Bopen(argv[2], OREAD)) == 0)
		sysfatal("can't open %s: %r", argv[2]);

	off1 = Boffset(f[F1]);
	off2 = Boffset(f[F2]);
	if(Bseek(f[F2], 0, 2) >= 0){
		Bseek(f[F2], off2, 0);
		join(F2);
	}else if(Bseek(f[F1], 0, 2) >= 0){
		Bseek(f[F1], off1, 0);
		Bseek(f[F2], off2, 0);
		join(F1);
	}else
		sysfatal("neither file is randomly accessible");
	if (discard)
		sysfatal("some input line was truncated");
	exits("");
}

char *
runetostr(char *buf, Rune *r)
{
	char *s;

	for(s = buf; *r; r++)
		s += runetochar(s, r);
	*s = '\0';
	return buf;
}

Rune *
strtorune(Rune *buf, char *s)
{
	Rune *r;

	for (r = buf; *s; r++)
		s += chartorune(r, s);
	*r = '\0';
	return buf;
}

void
readboth(int n[])
{
	n[F1] = input(F1);
	n[F2] = input(F2);
}

void
seekbotreadboth(int seekf, vlong bot, int n[])
{
	Bseek(f[seekf], bot, 0);
	readboth(n);
}

void
join(int seekf)
{
	int cmp, less;
	int n[NIN];
	vlong top, bot;

	less = seekf == F2;
	top = 0;
	bot = Boffset(f[seekf]);
	readboth(n);
	while(n[F1]>0 && n[F2]>0 || (a1||a2) && n[F1]+n[F2]>0) {
		cmp = comp();
		if(n[F1]>0 && n[F2]>0 && cmp>0 || n[F1]==0) {
			if(a2)
				output(0, n[F2]);
			if (seekf == F2)
				bot = Boffset(f[seekf]);
			n[F2] = input(F2);
		} else if(n[F1]>0 && n[F2]>0 && cmp<0 || n[F2]==0) {
			if(a1)
				output(n[F1], 0);
			if (seekf == F1)
				bot = Boffset(f[seekf]);
			n[F1] = input(F1);
		} else {
			/* n[F1]>0 && n[F2]>0 && cmp==0 */
			while(n[F2]>0 && cmp==0) {
				output(n[F1], n[F2]);
				top = Boffset(f[seekf]);
				n[seekf] = input(seekf);
				cmp = comp();
			}
			seekbotreadboth(seekf, bot, n);
			for(;;) {
				cmp = comp();
				if(n[F1]>0 && n[F2]>0 && cmp==0) {
					output(n[F1], n[F2]);
					n[seekf] = input(seekf);
				} else if(n[F1]>0 && n[F2]>0 &&
				    (less? cmp<0 :cmp>0) || n[seekf]==0)
					seekbotreadboth(seekf, bot, n);
				else {
					/*
					 * n[F1]>0 && n[F2]>0 &&
					 * (less? cmp>0 :cmp<0) ||
					 * n[seekf==F1? F2: F1]==0
					 */
					Bseek(f[seekf], top, 0);
					bot = top;
					n[seekf] = input(seekf);
					break;
				}
			}
		}
	}
}

int
input(int n)		/* get input line and split into fields */
{
	int c, i, len;
	char *line;
	Rune *bp;
	Rune **pp;

	bp = buf[n];
	pp = ppi[n];
	line = Brdline(f[n], '\n');
	if (line == nil)
		return(0);
	len = Blinelen(f[n]) - 1;
	c = line[len];
	line[len] = '\0';
	strtorune(bp, line);
	line[len] = c;			/* restore delimiter */
	if (c != '\n')
		discard++;

	i = 0;
	do {
		i++;
		if (sep1 == ' ')	/* strip multiples */
			while ((c = *bp) == sep1 || c == sep2)
				bp++;	/* skip blanks */
		*pp++ = bp;		/* record beginning */
		while ((c = *bp) != sep1 && c != sep2 && c != '\0')
			bp++;
		*bp++ = '\0';		/* mark end by overwriting blank */
	} while (c != '\0' && i < NFLD-1);

	*pp = 0;
	return(i);
}

void
prfields(int f, int on, int jn)
{
	int i;
	char buf[Bsize];

	for (i = 0; i < on; i++)
		if (i != jn)
			Bprint(bout, "%s%s", sepstr, runetostr(buf, ppi[f][i]));
}

void
output(int on1, int on2)	/* print items from olist */
{
	int i;
	Rune *temp;
	char buf[Bsize];

	if (no <= 0) {	/* default case */
		Bprint(bout, "%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
		prfields(F1, on1, j1);
		prfields(F2, on2, j2);
		Bputc(bout, '\n');
	} else {
		for (i = 0; i < no; i++) {
			if (olistf[i]==F0 && on1>j1)
				temp = ppi[F1][j1];
			else if (olistf[i]==F0 && on2>j2)
				temp = ppi[F2][j2];
			else {
				temp = ppi[olistf[i]][olist[i]];
				if(olistf[i]==F1 && on1<=olist[i] ||
				   olistf[i]==F2 && on2<=olist[i] ||
				   *temp==0)
					temp = null;
			}
			Bprint(bout, "%s", runetostr(buf, temp));
			if (i == no - 1)
				Bputc(bout, '\n');
			else
				Bprint(bout, "%s", sepstr);
		}
	}
}

char *
getoptarg(int *argcp, char ***argvp)
{
	int argc = *argcp;
	char **argv = *argvp;
	if(argv[1][2] != 0)
		return &argv[1][2];
	if(argc<=2 || argv[2][0]=='-')
		sysfatal("incomplete option %s", argv[1]);
	*argcp = argc-1;
	*argvp = ++argv;
	return argv[1];
}

void
oparse(char *s)
{
	for (no = 0; no<2*NFLD && *s; no++, s++) {
		switch(*s) {
		case 0:
			return;
		case '0':
			olistf[no] = F0;
			break;
		case '1':
		case '2':
			if(s[1] == '.' && isdigit(s[2])) {
				olistf[no] = *s=='1'? F1: F2;
				olist[no] = atoi(s += 2);
				break;
			}
			/* fall thru */
		default:
			sysfatal("invalid -o list");
		}
		if(s[1] == ',')
			s++;
	}
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].