Plan 9 from Bell Labs’s /usr/web/sources/extra/i/url.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include "i.h"

// function forward declarations
static void			split(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
static ParsedUrl*	urlfromparts(ParsedUrl* p);
static Rune*		canonize(Rune* s1,  int n1, Rune* s2);
	int			Ufmt(Fmt *f);

// globals
Rune* schemes[NSCHEMES] = {
	L"",
	L"http",
	L"https",
	L"ftp",
	L"file",
	L"gopher",
	L"mailto",
	L"news",
	L"nntp",
	L"telnet",
	L"wais",
	L"prospero",
	L"unknown"
};

void
urlinit(void)
{
	fmtinstall('U', Ufmt);
}

// Parse the surl string into its components.
// Return a pointer to a newly allocated ParsedUrl.
// If makeabs is true, add assume an http:// is in front of surl
// if surl has no // or :.
ParsedUrl*
makeurl(Rune* surl, int makeabs)
{
	int	i;
	int	surllen;
	int	scheme = NOSCHEME;
	Rune*	x;
	Rune*	sch = nil;
	int		schlen = 0;
	Rune*	url = nil;
	int		urllen = 0;
	Rune*	up = nil;
	int		uplen = 0;
	Rune*	hp = nil;
	int		hplen = 0;
	Rune*	netloc = nil;
	int		netloclen = 0;
	int		slsl;
	ParsedUrl	u;

	memset(&u, 0, sizeof(ParsedUrl));
	surllen = Strlen(surl);
	split(surl, surllen, L":", &sch, &schlen, &url, &urllen);
	if(urllen == 0) {
		url = surl;
		urllen = surllen;
		sch = nil;
		schlen = 0;
	}
	else {
		x = Strnclass(sch, L"^-a-zA-Z0-9.+", schlen);
		if(x != nil) {
			url = surl;
			urllen = surllen;
			sch = nil;
			schlen = 0;
		}
		else {
			scheme = UNKNOWN;
			for(i = 0; i < NSCHEMES; i++)
				if(!Strncmpci(sch, schlen, schemes[i])) {
					scheme = i;
					schlen = Strlen(schemes[i]);
					break;
				}
		}
	}
	if(scheme == MAILTO) {
		u.path = url;
		u.npath = urllen;
	}
	else {
		slsl = 0;
		if(urllen >= 2 && url[0] == '/' && url[1] == '/' ) {
			slsl = 1;
			url += 2;
			urllen -= 2;
		}
		else if(makeabs && scheme==NOSCHEME) {
			slsl = 1;
			scheme = HTTP;
		}
		if(slsl) {
			splitl(url, urllen, L"/", &netloc, &netloclen, &u.path, &u.npath);
			if(u.npath != 0) {
				u.path = u.path+1;
				u.npath--;
			}
			u.pstart = L"/";
			u.npstart = 1;
			if(scheme == FILE) {
				u.host = netloc;
				u.nhost = netloclen;
			}
			else {
				split(netloc, netloclen, L"@", &up, &uplen, &hp, &hplen);
				if(hplen == 0) {
					hp = up;
					hplen = uplen;
				}
				else
					split(up, uplen, L":", &u.user, &u.nuser, &u.passwd, &u.npasswd);
				split(hp, hplen, L":", &u.host, &u.nhost, &u.port, &u.nport);
			}
		}
		else {
			if(urllen > 0 && url[0] == '/') {
				u.pstart = L"/";
				u.npstart = 1;
				u.path = url+1;
				u.npath = urllen-1;
			}
			else {
				u.path = url;
				u.npath = urllen;
			}
		}
		if(scheme == FILE) {
			if(u.nhost == 0) {
				u.host = L"localhost";
				u.nhost = 9;
			}
		}
		else {
			split(u.path, u.npath, L"#", &u.path, &u.npath, &u.frag, &u.nfrag);
			split(u.path, u.npath, L"?", &u.path, &u.npath, &u.query, &u.nquery);
		}
	}
	u.scheme = scheme;
	return urlfromparts(&u);
}

int
Ufmt(Fmt *f)
{
	ParsedUrl* u;
	Rune* su;

	u = va_arg(f->args, ParsedUrl*);
	if(u == nil)
		su = L"<null>";
	else
		su = u->url;
	return fmtrunestrcpy(f, su);
}

// Return a URL that is u made absolute relative to b.
// The original urls won't be touched.
ParsedUrl*
makeabsoluteurl(ParsedUrl* u, ParsedUrl* b)
{
	int n;
	int scheme;
	Rune* p;
	Rune* path;
	ParsedUrl *ans;
	ParsedUrl t;

	scheme = u->scheme;
	// Should return if SCHEME already given (according to RFC1808,
	// but various extant web pages violate that rule for non-http schemes
	if(scheme != NOSCHEME && scheme != HTTP)
			return u;

	memset(&t, 0, sizeof(ParsedUrl));
	if(u->nhost == 0 && u->npath == 0 && u->npstart == 0 && u->nquery == 0 && u->nfrag == 0) {
		memmove(&t, b, sizeof(ParsedUrl));
	}
	else {
		memmove(&t, u, sizeof(ParsedUrl));
		if(scheme == NOSCHEME)
			scheme = b->scheme;
		if(t.nhost == 0) {
			t.user = b->user;
			t.nuser = b->nuser;
			t.passwd = b->passwd;
			t.npasswd = b->npasswd;
			t.host = b->host;
			t.nhost = b->nhost;
			t.port = b->port;
			t.nport = b->nport;
			if(t.npstart == 0) {
				t.pstart = L"/";
				t.npstart = 1;
				if(t.npath == 0) {
					t.path = b->path;
					t.npath = b->npath;
					if(t.nquery == 0) {
						t.query = b->query;
						t.nquery = b->nquery;
					}
				}
				else {
					n = b->npath;
					p = Strnrclass(b->path, L"/", n);
					if(p == nil)
						n = 0;
					else
						n = p-b->path;
					t.path = path = canonize(b->path, n, u->path);
					t.npath = Strlen(path);
				}
			}
		}
	}
	t.scheme = scheme;
	ans = urlfromparts(&t);
	return ans;
}

// Make a copy of url that has q as query.
ParsedUrl*
makequeryurl(ParsedUrl* url, Rune* q)
{
	ParsedUrl t;

	memmove(&t, url, sizeof(ParsedUrl));
	t.query = q;
	t.nquery = Strlen(q);
	return urlfromparts(&t);
}

// p has correct values for all its fields, except they might
// point into different strings.  Make a new ParsedUrl with
// one contiguous string containing all the parts.
static ParsedUrl*
urlfromparts(ParsedUrl* p)
{
	int n;
	int schlen;
	Rune* x;
	ParsedUrl* ans;

	if(p->scheme == NOSCHEME)
		schlen = 0;
	else
		schlen = Strlen(schemes[p->scheme]);
	n = schlen + (schlen != 0);
	if(p->nhost != 0)
		n += 2 + p->nuser + p->npasswd + p->nhost + p->nport +
			(p->npasswd != 0) + (p->nuser != 0) + (p->nport != 0);
	n += p->npstart + p->npath + p->nquery + p->nfrag +
		(p->nquery != 0) + (p->nfrag != 0);

	ans = (ParsedUrl*)emalloc(sizeof(ParsedUrl)+n*sizeof(Rune));
	x = ans->url;
	ans->scheme = p->scheme;
	ans->nuser = p->nuser;
	ans->npasswd = p->npasswd;
	ans->nhost = p->nhost;
	ans->nport = p->nport;
	ans->npstart = p->npstart;
	ans->npath = p->npath;
	ans->nquery = p->nquery;
	ans->nfrag = p->nfrag;
	if(schlen != 0) {
		x = Stradd(x, schemes[p->scheme], schlen);
		*x++ = ':';
	}
	if(p->nhost != 0) {
		x = Stradd(x, L"//", 2);
		ans->user = x;
		ans->passwd = x;
		if(p->nuser != 0) {
			x = Stradd(x, p->user, p->nuser);
			if(p->npasswd != 0) {
				*x++ = ':';
				ans->passwd = x;
				x = Stradd(x, p->passwd, p->npasswd);
			}
			*x++ = '@';
		}
		ans->host = x;
		ans->port = x;
		x = Stradd(x, p->host, p->nhost);
		if(p->nport != 0) {
			*x++ = ':';
			ans->port = x;
			x = Stradd(x, p->port, p->nport);
		}
	}
	else {
		ans->user = x;
		ans->passwd = x;
		ans->host = x;
		ans->port = x;
	}
	ans->pstart = x;
	x = Stradd(x, L"/", p->npstart);
	ans->path = x;
	x = Stradd(x, p->path, p->npath);
	ans->query = x;
	if(p->nquery != 0) {
		*x++ = '?';
		ans->query = x;
		x = Stradd(x, p->query, p->nquery);
	}
	ans->frag = x;
	if(p->nfrag != 0) {
		*x++ = '#';
		x = Stradd(x, p->frag, p->nfrag);
	}
	*x++ = 0;
	assert(x == ans->url+n+1);
	ans->nurl = n+1;
	return ans;
}

// Don't include fragment in test, since we are testing if the
// pointed to docs are the same, not places within docs.
int
urlequal(ParsedUrl* a, ParsedUrl* b)
{
	return
		a->npath == b->npath && Streqn(a->path, a->npath, b->path) &&
		a->scheme == b->scheme &&
		a->nhost == b->nhost && Streqn(a->host, a->nhost, b->host) &&
		a->nport == b->nport && Streqn(a->port, a->nport, b->port) &&
		a->nuser == b->nuser && Streqn(a->user, a->nuser, b->user) &&
		a->npasswd == b->npasswd && Streqn(a->passwd, a->npasswd, b->passwd) &&
		a->npstart == b->npstart && Streqn(a->pstart, a->npstart, b->pstart) &&
		a->nquery == b->nquery && Streqn(a->query, a->nquery, b->query);
}

// Like splitl, but assume one char match and omit that from second part.
// If no split, all s goes in first component
static void
split(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
{
	splitl(s, n, cl, p1, n1, p2, n2);
	if((*n2) != 0) {
		(*p2)++;
		(*n2)--;
	}
}

// Make a new string that is canonization of the path s1[0:n1]/s2.
// (Canonization removes ./ and ../ from string).
static Rune*
canonize(Rune* s1,  int n1, Rune* s2)
{
	int	k, m, n, shiftby, shiftstart;
	Rune*	ans;
	Rune*	r;
	Rune*	elem[SMALLBUFSIZE];
	int		elen[SMALLBUFSIZE];

	k = splitall(s1, n1, L"/", elem, elen, SMALLBUFSIZE-1);
	m = splitall(s2, Strlen(s2), L"/", elem+k, elen+k, SMALLBUFSIZE-k);
	n = m+k;
	if(n == SMALLBUFSIZE)
		trace("warning: url too long; truncated\n");
	for(k = 0; k < n; ) {
		m = elen[k];
		if(m <= 2) {
			shiftby = 0;
			shiftstart = 0;
			r = elem[k];
			if(r[0] == '.') {
				if(m == 1) {
					shiftstart = k;
					shiftby = 1;
				}
				else if(r[1] == '.') {
					shiftstart = k-1;
					shiftby = 2;
					if(shiftstart == -1) {
						shiftstart = 0;
						shiftby = 1;
					}
				}
			}
			if(shiftby != 0) {
				for(m = shiftstart+shiftby; m < n; m++) {
					elem[m-shiftby] = elem[m];
					elen[m-shiftby] = elen[m];
				}
				n -= shiftby;
				k = shiftstart;
				continue;
			}
		}
		k++;
	}
	if(n == 0)
		return nil;

	m = 0;
	for(k = 0; k < n; k++)
		m += elen[k];
	m += k-1;

	ans = emalloc((m+1)*sizeof(Rune));
	r = ans;
	for(k = 0; k < n; k++) {
		r = Stradd(r, elem[k], elen[k]);
		if(k < n-1)
			*r++ = '/';
	}
	*r = 0;
	return ans;
}

ParsedUrl*
copyurl(ParsedUrl* url)
{
	return urlfromparts(url);
}

// For debugging
static int
validurlpart(ParsedUrl* u, int n, int npart, Rune* part)
{
	return npart == 0 ||
		(npart > 0 && part != nil && &u->url[0] <= part && part+npart <= &u->url[n]);
}

int
validurl(ParsedUrl* u)
{
	int n;

	if(u == nil || u->scheme < NOSCHEME || u->scheme >= NSCHEMES)
		return 0;
	else {
		n = (u->scheme == NOSCHEME)? 0 : Strlen(schemes[u->scheme])+1;
		if(u->nhost != 0)
			n += 2 + u->nuser + u->npasswd + u->nhost + u->nport +
				(u->npasswd != 0) + (u->nuser != 0) + (u->nport != 0);
		n += u->npstart + u->npath + u->nquery + u->nfrag +
			(u->nquery != 0) + (u->nfrag != 0);
		return n == u->nurl &&
			u->url[n] == 0 &&
			validurlpart(u, n, u->nuser, u->user) &&
			validurlpart(u, n, u->npasswd, u->passwd) &&
			validurlpart(u, n, u->nhost, u->host) &&
			validurlpart(u, n, u->nport, u->port) &&
			(u->npstart == 0 || (u->npstart == 1 && u->pstart[0] == L'/')) &&
			validurlpart(u, n, u->npath, u->path) &&
			validurlpart(u, n, u->nquery, u->query) &&
			validurlpart(u, n, u->nfrag, u->frag);
	}
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].