Plan 9 from Bell Labs’s /usr/web/sources/contrib/steve/root/sys/src/cmd/mothra/crackurl.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include <u.h>
#include <libc.h>
#include <draw.h>
#include <event.h>
#include <panel.h>
#include <ctype.h>
#include "mothra.h"
#define	IP	1	/* url can contain //ipaddress[:port] */
#define	REL	2	/* fill in ip address & root of name from current, if necessary */
Scheme scheme[]={
	"http:",	HTTP,	IP|REL,	80,
	"https:",	HTTP,	IP|REL,	80,	/* is this right? */
	"ftp:",		FTP,	IP|REL,	21,
	"file:",	FILE,	REL,	0,
	"telnet:",	TELNET,	IP,	0,
	"mailto:",	MAILTO,	0,	0,
	"gopher:",	GOPHER,	IP,	70,
#ifdef securityHole
	"exec:",	EXEC,	0,	0,
#endif
	0,		HTTP,	IP|REL,	80,
};
int endaddr(int c){
	return c=='/' || c==':' || c=='?' || c=='#' || c=='\0';
}
/*
 * Remove ., mu/.. and empty components from path names.
 * Empty last components of urls are significant, and
 * therefore preserved.
 */
void urlcanon(char *name){
	char *s, *t;
	char **comp, **p, **q;
	int rooted;
	rooted=name[0]=='/';
	/*
	 * Break the name into a list of components
	 */
	comp=emalloc((strlen(name)+2)*sizeof(char *));
	p=comp;
	*p++=name;
	for(s=name;;s++){
		if(*s=='/'){
			*p++=s+1;
			*s='\0';
		}
		else if(*s=='\0' || *s=='?')
			break;
	}
	*p=0;
	/*
	 * go through the component list, deleting components that are empty (except
	 * the last component) or ., and any .. and its non-.. predecessor.
	 */
	p=q=comp;
	while(*p){
		if(strcmp(*p, "")==0 && p[1]!=0
		|| strcmp(*p, ".")==0)
			p++;
		else if(strcmp(*p, "..")==0 && q!=comp && strcmp(q[-1], "..")!=0){
			--q;
			p++;
		}
		else
			*q++=*p++;
	}
	*q=0;
	/*
	 * rebuild the path name
	 */
	s=name;
	if(rooted) *s++='/';
	for(p=comp;*p;p++){
		t=*p;
		while(*t) *s++=*t++;
		if(p[1]!=0) *s++='/';
	}
	*s='\0';
	free(comp);
}
/*
 * True url parsing is a nightmare.
 * This assumes that there are two basic syntaxes
 * for url's -- with and without an ip address.
 * If the type identifier or the ip address and port number
 * or the relative address is missing from urlname or is empty, 
 * it is copied from cur.
 */
void crackurl(Url *url, char *urlname, Url *cur){
	char *relp, *tagp, *httpname;
	int len;
	Scheme *up;
	char buf[30];
	/*
	 * The following lines `fix' the most egregious urlname syntax errors
	 */
	while(*urlname==' ' || *urlname=='\t' || *urlname=='\n') urlname++;
	relp=strchr(urlname, '\n');
	if(relp) *relp='\0';
	/*
	 * In emulation of Netscape, attach a free "http://"
	 * to names beginning with "www.".
	 */
	if(strncmp(urlname, "www.", 4)==0){
		httpname=emalloc(strlen(urlname)+8);
		strcpy(httpname, "http://");
		strcat(httpname, urlname);
		crackurl(url, httpname, cur);
		free(httpname);
		return;
	}
	url->port=cur->port;
	strcpy(url->ipaddr, cur->ipaddr);
	strcpy(url->reltext, cur->reltext);
	if(strchr(urlname, ':')==0){
		up=cur->scheme;
		if(up==0){
			up=&scheme[0];
			cur->scheme=up;
		}
	}
	else{
		for(up=scheme;up->name;up++){
			len=strlen(up->name);
			if(strncmp(urlname, up->name, len)==0){
				urlname+=len;
				break;
			}
		}
		if(up->name==0) up=&scheme[0];	/* default to http: */
	}
	url->access=up->type;
	url->scheme=up;
	if(up!=cur->scheme)
		url->reltext[0]='\0';
	if(up->flags&IP && strncmp(urlname, "//", 2)==0){
		urlname+=2;
		for(relp=urlname;!endaddr(*relp);relp++);
		len=relp-urlname;
		strncpy(url->ipaddr, urlname, len);
		url->ipaddr[len]='\0';
		urlname=relp;
		if(*urlname==':'){
			urlname++;
			url->port=atoi(urlname);
			while(!endaddr(*urlname)) urlname++;
		}
		else
			url->port=up->port;
		if(*urlname=='\0') urlname="/";
	}
	tagp=strchr(urlname, '#');
	if(tagp){
		*tagp='\0';
		strcpy(url->tag, tagp+1);
	}
	else
		url->tag[0]='\0';	
	if(!(up->flags&REL) || *urlname=='/')
		strcpy(url->reltext, urlname);
	else if(urlname[0]){
		relp=strrchr(url->reltext, '/');
		if(relp==0)
			strcpy(url->reltext, urlname);
		else
			strcpy(relp+1, urlname);
	}
	urlcanon(url->reltext);
	if(tagp) *tagp='#';
	/*
	 * The following mess of strcpys and strcats
	 * can't be changed to a few sprints because
	 * urls are not necessarily composed of legal utf
	 */
	strcpy(url->fullname, up->name);
	if(up->flags&IP){
		strcat(url->fullname, "//");
		strcat(url->fullname, url->ipaddr);
		if(url->port!=up->port){
			sprint(buf, ":%d", url->port);
			strcat(url->fullname, buf);
		}
	}
	strcat(url->fullname, url->reltext);
	url->map=0;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].