/*
* domain name resolvers, see rfcs 1035 and 1123
*/
#include <u.h>
#include <libc.h>
#include <ip.h>
#include <bio.h>
#include <ndb.h>
#include "dns.h"
typedef struct Dest Dest;
typedef struct Ipaddr Ipaddr;
typedef struct Query Query;
enum
{
Udp, Tcp,
Answerr= -1,
Answnone,
Maxdest= 24, /* maximum destinations for a request message */
Maxoutstanding= 15, /* max. outstanding queries per domain name */
Remntretry= 15, /* min. sec.s between /net.alt remount tries */
/*
* these are the old values; we're trying longer timeouts now
* primarily for the benefit of remote nameservers querying us
* during times of bad connectivity.
*/
// Maxtrans= 3, /* maximum transmissions to a server */
// Maxretries= 3, /* cname+actual resends: was 32; have pity on user */
// Maxwaitms= 1000, /* wait no longer for a remote dns query */
// Minwaitms= 100, /* willing to wait for a remote dns query */
Maxtrans= 5, /* maximum transmissions to a server */
Maxretries= 5, /* cname+actual resends: was 32; have pity on user */
Maxwaitms= 4000, /* wait no longer for a remote dns query */
Minwaitms= 750, /* willing to wait for a remote dns query */
Destmagic= 0xcafebabe,
Querymagic= 0xdeadbeef,
};
enum { Hurry, Patient, };
enum { Outns, Inns, };
struct Ipaddr {
Ipaddr *next;
uchar ip[IPaddrlen];
};
struct Dest
{
uchar a[IPaddrlen]; /* ip address */
DN *s; /* name server */
int nx; /* number of transmissions */
int code; /* response code; used to clear dp->respcode */
ulong magic;
};
/*
* Query has a QLock in it, thus it can't be an automatic
* variable, since each process would see a separate copy
* of the lock on its stack.
*/
struct Query {
DN *dp; /* domain */
ushort type; /* and type to look up */
Request *req;
RR *nsrp; /* name servers to consult */
/* dest must not be on the stack due to forking in slave() */
Dest *dest; /* array of destinations */
Dest *curdest; /* pointer to next to fill */
int ndest; /* transmit to this many on this round */
int udpfd;
QLock tcplock; /* only one tcp call at a time per query */
int tcpset;
int tcpfd; /* if Tcp, read replies from here */
int tcpctlfd;
uchar tcpip[IPaddrlen];
ulong magic;
};
/* estimated % probability of such a record existing at all */
int likely[] = {
[Ta] 70,
[Taaaa] 45,
[Tcname] 15,
[Tmx] 60,
[Tns] 90,
[Tnull] 5,
[Tptr] 35,
[Tsoa] 90,
[Tsrv] 60,
[Ttxt] 15,
[Tcaa] 45,
[Tall] 95,
};
static RR* dnresolve1(char*, int, int, Request*, int, int);
static int netquery(Query *, int);
/*
* reading /proc/pid/args yields either "name args" or "name [display args]",
* so return only display args, if any.
*/
static char *
procgetname(void)
{
int fd, n;
char *lp, *rp;
char buf[256];
snprint(buf, sizeof buf, "#p/%d/args", getpid());
if((fd = open(buf, OREAD)) < 0)
return strdup("");
*buf = '\0';
n = read(fd, buf, sizeof buf-1);
close(fd);
if (n >= 0)
buf[n] = '\0';
if ((lp = strchr(buf, '[')) == nil ||
(rp = strrchr(buf, ']')) == nil)
return strdup("");
*rp = '\0';
return strdup(lp+1);
}
void
rrfreelistptr(RR **rpp)
{
RR *rp;
if (rpp == nil || *rpp == nil)
return;
rp = *rpp;
*rpp = nil; /* update pointer in memory before freeing list */
rrfreelist(rp);
}
/*
* lookup 'type' info for domain name 'name'. If it doesn't exist, try
* looking it up as a canonical name.
*
* this process can be quite slow if time-outs are set too high when querying
* nameservers that just don't respond to certain query types. in that case,
* there will be multiple udp retries, multiple nameservers will be queried,
* and this will be repeated for a cname query. the whole thing will be
* retried several times until we get an answer or a time-out.
*/
RR*
dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
int recurse, int rooted, int *status)
{
RR *rp, *nrp, *drp;
DN *dp;
int loops;
char *procname;
char nname[Domlen];
if(status)
*status = 0;
if(depth > 12) /* in a recursive loop? */
return nil;
procname = procgetname();
/*
* hack for systems that don't have resolve search
* lists. Just look up the simple name in the database.
*/
if(!rooted && strchr(name, '.') == nil){
rp = nil;
drp = domainlist(class);
for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
snprint(nname, sizeof nname, "%s.%s", name,
nrp->ptr->name);
rp = dnresolve(nname, class, type, req, cn, depth+1,
recurse, rooted, status);
lock(&dnlock);
rrfreelist(rrremneg(&rp));
unlock(&dnlock);
}
if(drp != nil)
rrfreelist(drp);
procsetname(procname);
free(procname);
return rp;
}
/*
* try the name directly
*/
rp = dnresolve1(name, class, type, req, depth, recurse);
if(rp == nil) {
/*
* try it as a canonical name if we weren't told
* that the name didn't exist
*/
dp = dnlookup(name, class, 0);
if(type != Tptr && dp->respcode != Rname)
for(loops = 0; rp == nil && loops < Maxretries; loops++){
/* retry cname, then the actual type */
rp = dnresolve1(name, class, Tcname, req,
depth, recurse);
if(rp == nil)
break;
/* rp->host == nil shouldn't happen, but does */
if(rp->negative || rp->host == nil){
rrfreelist(rp);
rp = nil;
break;
}
name = rp->host->name;
lock(&dnlock);
if(cn)
rrcat(cn, rp);
else
rrfreelist(rp);
unlock(&dnlock);
rp = dnresolve1(name, class, type, req,
depth, recurse);
}
/* distinction between not found and not good */
if(rp == nil && status != nil && dp->respcode != Rok)
*status = dp->respcode;
}
procsetname(procname);
free(procname);
return randomize(rp);
}
static void
queryinit(Query *qp, DN *dp, int type, Request *req)
{
memset(qp, 0, sizeof *qp);
qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
qp->dp = dp;
qp->type = type;
if (qp->type != type)
dnslog("queryinit: bogus type %d", type);
qp->req = req;
qp->nsrp = nil;
qp->dest = qp->curdest = nil;
qp->magic = Querymagic;
}
static void
queryck(Query *qp)
{
assert(qp);
assert(qp->magic == Querymagic);
}
static void
querydestroy(Query *qp)
{
queryck(qp);
/* leave udpfd open */
if (qp->tcpfd > 0)
close(qp->tcpfd);
if (qp->tcpctlfd > 0) {
hangup(qp->tcpctlfd);
close(qp->tcpctlfd);
}
free(qp->dest);
memset(qp, 0, sizeof *qp); /* prevent accidents */
qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
}
static void
destinit(Dest *p)
{
memset(p, 0, sizeof *p);
p->magic = Destmagic;
}
static void
destck(Dest *p)
{
assert(p);
assert(p->magic == Destmagic);
}
/*
* if the response to a query hasn't arrived within 100 ms.,
* it's unlikely to arrive at all. after 1 s., it's really unlikely.
* queries for missing RRs are likely to produce time-outs rather than
* negative responses, so cname and aaaa queries are likely to time out,
* thus we don't wait very long for them.
*/
static void
notestats(vlong start, int tmout, int type)
{
qlock(&stats);
if (tmout) {
stats.tmout++;
if (type == Taaaa)
stats.tmoutv6++;
else if (type == Tcname)
stats.tmoutcname++;
} else {
long wait10ths = NS2MS(nsec() - start) / 100;
if (wait10ths <= 0)
stats.under10ths[0]++;
else if (wait10ths >= nelem(stats.under10ths))
stats.under10ths[nelem(stats.under10ths) - 1]++;
else
stats.under10ths[wait10ths]++;
}
qunlock(&stats);
}
static void
noteinmem(void)
{
qlock(&stats);
stats.answinmem++;
qunlock(&stats);
}
/* netquery with given name servers, free ns rrs when done */
static int
netqueryns(Query *qp, int depth, RR *nsrp)
{
int rv;
qp->nsrp = nsrp;
rv = netquery(qp, depth);
lock(&dnlock);
rrfreelist(nsrp);
unlock(&dnlock);
return rv;
}
static RR*
issuequery(Query *qp, char *name, int class, int depth, int recurse)
{
char *cp;
DN *nsdp;
RR *rp, *nsrp, *dbnsrp;
/*
* if we're running as just a resolver, query our
* designated name servers
*/
if(cfg.resolver){
nsrp = randomize(getdnsservers(class));
if(nsrp != nil)
if(netqueryns(qp, depth+1, nsrp) > Answnone)
return rrlookup(qp->dp, qp->type, OKneg);
}
/*
* walk up the domain name looking for
* a name server for the domain.
*/
for(cp = name; cp; cp = walkup(cp)){
/*
* if this is a local (served by us) domain,
* return answer
*/
dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
if(dbnsrp && dbnsrp->local){
rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
lock(&dnlock);
rrfreelist(dbnsrp);
unlock(&dnlock);
return rp;
}
/*
* if recursion isn't set, just accept local
* entries
*/
if(recurse == Dontrecurse){
if(dbnsrp) {
lock(&dnlock);
rrfreelist(dbnsrp);
unlock(&dnlock);
}
continue;
}
/* look for ns in cache */
nsdp = dnlookup(cp, class, 0);
nsrp = nil;
if(nsdp)
nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
/* if the entry timed out, ignore it */
if(nsrp && nsrp->ttl < now){
lock(&dnlock);
rrfreelistptr(&nsrp);
unlock(&dnlock);
}
if(nsrp){
lock(&dnlock);
rrfreelistptr(&dbnsrp);
unlock(&dnlock);
/* query the name servers found in cache */
if(netqueryns(qp, depth+1, nsrp) > Answnone)
return rrlookup(qp->dp, qp->type, OKneg);
} else if(dbnsrp)
/* try the name servers found in db */
if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
return rrlookup(qp->dp, qp->type, NOneg);
}
return nil;
}
static RR*
dnresolve1(char *name, int class, int type, Request *req, int depth,
int recurse)
{
Area *area;
DN *dp;
RR *rp;
Query *qp;
if(debug)
dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
/* only class Cin implemented so far */
if(class != Cin)
return nil;
dp = dnlookup(name, class, 1);
/*
* Try the cache first
*/
rp = rrlookup(dp, type, OKneg);
if(rp)
if(rp->db){
/* unauthoritative db entries are hints */
if(rp->auth) {
noteinmem();
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
getpid(), name, type, class);
return rp;
}
} else
/* cached entry must still be valid */
if(rp->ttl > now)
/* but Tall entries are special */
if(type != Tall || rp->query == Tall) {
noteinmem();
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
getpid(), name, type, class);
return rp;
}
lock(&dnlock);
rrfreelist(rp);
unlock(&dnlock);
rp = nil; /* accident prevention */
USED(rp);
/*
* try the cache for a canonical name. if found punt
* since we'll find it during the canonical name search
* in dnresolve().
*/
if(type != Tcname){
rp = rrlookup(dp, Tcname, NOneg);
lock(&dnlock);
rrfreelist(rp);
unlock(&dnlock);
if(rp){
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
getpid(), name, type, class);
return nil;
}
}
/*
* if the domain name is within an area of ours,
* we should have found its data in memory by now.
*/
area = inmyarea(dp->name);
if (area || strncmp(dp->name, "local#", 6) == 0) {
// char buf[32];
// dnslog("%s %s: no data in area %s", dp->name,
// rrname(type, buf, sizeof buf), area->soarr->owner->name);
return nil;
}
qp = emalloc(sizeof *qp);
queryinit(qp, dp, type, req);
rp = issuequery(qp, name, class, depth, recurse);
querydestroy(qp);
free(qp);
if(rp){
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: rr from query",
getpid(), name, type, class);
return rp;
}
/* settle for a non-authoritative answer */
rp = rrlookup(dp, type, OKneg);
if(rp){
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
getpid(), name, type, class);
return rp;
}
/* noone answered. try the database, we might have a chance. */
rp = dblookup(name, class, type, 0, 0);
if (rp) {
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
getpid(), name, type, class);
}else{
if(debug)
dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
getpid(), name, type, class);
}
return rp;
}
/*
* walk a domain name one element to the right.
* return a pointer to that element.
* in other words, return a pointer to the parent domain name.
*/
char*
walkup(char *name)
{
char *cp;
cp = strchr(name, '.');
if(cp)
return cp+1;
else if(*name)
return "";
else
return 0;
}
/*
* Get a udp port for sending requests and reading replies. Put the port
* into "headers" mode.
*/
static char *hmsg = "headers";
int
udpport(char *mtpt)
{
int fd, ctl;
char ds[64], adir[64];
/* get a udp port */
snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
ctl = announce(ds, adir);
if(ctl < 0){
/* warning("can't get udp port"); */
return -1;
}
/* turn on header style interface */
if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
close(ctl);
warning(hmsg);
return -1;
}
/* grab the data file */
snprint(ds, sizeof ds, "%s/data", adir);
fd = open(ds, ORDWR);
close(ctl);
if(fd < 0)
warning("can't open udp port %s: %r", ds);
return fd;
}
void
initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
{
mp->flags = flags;
mp->id = reqno;
mp->qd = rp;
if(rp != nil)
mp->qdcount = 1;
}
DNSmsg *
newdnsmsg(RR *rp, int flags, ushort reqno)
{
DNSmsg *mp;
mp = emalloc(sizeof *mp);
initdnsmsg(mp, rp, flags, reqno);
return mp;
}
/* generate a DNS UDP query packet */
int
mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
{
DNSmsg m;
int len;
Udphdr *uh = (Udphdr*)buf;
RR *rp;
/* stuff port number into output buffer */
memset(uh, 0, sizeof *uh);
hnputs(uh->rport, Dnsport);
/* make request and convert it to output format */
memset(&m, 0, sizeof m);
rp = rralloc(type);
rp->owner = dp;
initdnsmsg(&m, rp, flags, reqno);
/*
* we're conservative about payload size; if we know that the remote
* can handle more (e.g., it does edns0), we could raise it.
*/
len = convDNS2M(&m, &buf[Udphdrsize], Maxdnspayload);
rrfreelistptr(&m.qd);
memset(&m, 0, sizeof m); /* cause trouble */
return len;
}
void
freeanswers(DNSmsg *mp)
{
lock(&dnlock);
rrfreelistptr(&mp->qd);
rrfreelistptr(&mp->an);
rrfreelistptr(&mp->ns);
rrfreelistptr(&mp->ar);
unlock(&dnlock);
mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
}
/* timed read of reply. sets srcip. ibuf must be 64K to handle tcp answers. */
static int
readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
uchar *srcip)
{
int len, fd;
long ms;
vlong startns = nsec();
uchar *reply;
uchar lenbuf[2];
len = -1; /* pessimism */
ms = endms - NS2MS(startns);
if (ms < Minreqtm)
ms = Minreqtm; /* give it a fighting chance */
reply = ibuf;
memset(srcip, 0, IPaddrlen);
alarm(ms);
if (medium == Udp)
if (qp->udpfd <= 0)
dnslog("readnet: qp->udpfd closed");
else {
len = read(qp->udpfd, ibuf, Udphdrsize+Maxpayload);
alarm(0);
notestats(startns, len < 0, qp->type);
if (len >= IPaddrlen)
memmove(srcip, ibuf, IPaddrlen);
if (len >= Udphdrsize) {
len -= Udphdrsize;
reply += Udphdrsize;
}
}
else {
if (!qp->tcpset)
dnslog("readnet: tcp params not set");
fd = qp->tcpfd;
if (fd <= 0)
dnslog("readnet: %s: tcp fd unset for dest %I",
qp->dp->name, qp->tcpip);
else if (readn(fd, lenbuf, 2) != 2) {
dnslog("readnet: short read of 2-byte tcp msg size "
"from %I after %ld ms", qp->tcpip, ms);
/* probably a time-out */
notestats(startns, 1, qp->type);
} else {
len = lenbuf[0]<<8 | lenbuf[1];
if (readn(fd, ibuf, len) != len) {
dnslog("readnet: short read of tcp data "
"from %I after %ld ms",
qp->tcpip, ms);
/* probably a time-out */
notestats(startns, 1, qp->type);
len = -1;
}
}
memmove(srcip, qp->tcpip, IPaddrlen);
}
alarm(0);
*replyp = reply;
return len;
}
/*
* read replies to a request and remember the rrs in the answer(s).
* ignore any of the wrong type.
* wait at most until endms.
*/
static int
readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
uvlong endms)
{
int len;
char *err;
char tbuf[32];
uchar *reply;
uchar srcip[IPaddrlen];
RR *rp;
queryck(qp);
memset(mp, 0, sizeof *mp);
memset(srcip, 0, sizeof srcip);
if (0)
len = -1;
for (; timems() < endms &&
(len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
freeanswers(mp)){
/* convert into internal format */
memset(mp, 0, sizeof *mp);
err = convM2DNS(reply, len, mp, nil);
if (mp->flags & Ftrunc) {
free(err);
freeanswers(mp);
/* notify our caller to retry the query via tcp. */
return -1;
} else if(err){
dnslog("readreply: %s: input err, len %d: %s: %I",
qp->dp->name, len, err, srcip);
free(err);
continue;
}
if(debug)
logreply(qp->req->id, srcip, mp);
/* answering the right question? */
if(mp->id != req)
dnslog("%d: id %d instead of %d: %I", qp->req->id,
mp->id, req, srcip);
else if(mp->qd == 0)
dnslog("%d: no question RR: %I", qp->req->id, srcip);
else if(mp->qd->owner != qp->dp)
dnslog("%d: owner %s instead of %s: %I", qp->req->id,
mp->qd->owner->name, qp->dp->name, srcip);
else if(mp->qd->type != qp->type)
dnslog("%d: qp->type %d instead of %d: %I",
qp->req->id, mp->qd->type, qp->type, srcip);
else {
/* remember what request this is in answer to */
for(rp = mp->an; rp; rp = rp->next)
rp->query = qp->type;
return 0;
}
}
if (timems() >= endms) {
; /* query expired */
} else if (0) {
/* this happens routinely when a read times out */
dnslog("readreply: %s type %s: ns %I read error or eof "
"(returned %d): %r", qp->dp->name, rrname(qp->type,
tbuf, sizeof tbuf), srcip, len);
if (medium == Udp)
for (rp = qp->nsrp; rp != nil; rp = rp->next)
if (rp->type == Tns)
dnslog("readreply: %s: query sent to "
"ns %s", qp->dp->name,
rp->host->name);
}
return -1;
}
/*
* return non-0 if first list includes second list
*/
int
contains(RR *rp1, RR *rp2)
{
RR *trp1, *trp2;
for(trp2 = rp2; trp2; trp2 = trp2->next){
for(trp1 = rp1; trp1; trp1 = trp1->next)
if(trp1->type == trp2->type)
if(trp1->host == trp2->host)
if(trp1->owner == trp2->owner)
break;
if(trp1 == nil)
return 0;
}
return 1;
}
/*
* return multicast version if any
*/
int
ipisbm(uchar *ip)
{
if(isv4(ip)){
if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
ipcmp(ip, IPv4bcast) == 0)
return 4;
} else
if(ip[0] == 0xff)
return 6;
return 0;
}
/*
* Get next server address(es) into qp->dest[nd] and beyond
*/
static int
serveraddrs(Query *qp, int nd, int depth)
{
RR *rp, *arp, *trp;
Dest *cur;
if(nd >= Maxdest) /* dest array is full? */
return Maxdest - 1;
/*
* look for a server whose address we already know.
* if we find one, mark it so we ignore this on
* subsequent passes.
*/
arp = 0;
for(rp = qp->nsrp; rp; rp = rp->next){
assert(rp->magic == RRmagic);
if(rp->marker)
continue;
arp = rrlookup(rp->host, Ta, NOneg);
if(arp == nil)
arp = rrlookup(rp->host, Taaaa, NOneg);
if(arp){
rp->marker = 1;
break;
}
arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
if(arp == nil)
arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
if(arp){
rp->marker = 1;
break;
}
}
/*
* if the cache and database lookup didn't find any new
* server addresses, try resolving one via the network.
* Mark any we try to resolve so we don't try a second time.
*/
if(arp == 0)
for(rp = qp->nsrp; rp; rp = rp->next){
if(rp->marker)
continue;
rp->marker = 1;
/*
* avoid loops looking up a server under itself
*/
if(subsume(rp->owner->name, rp->host->name))
continue;
arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
depth+1, Recurse, 1, 0);
if(arp == nil)
arp = dnresolve(rp->host->name, Cin, Taaaa,
qp->req, 0, depth+1, Recurse, 1, 0);
lock(&dnlock);
rrfreelist(rrremneg(&arp));
unlock(&dnlock);
if(arp)
break;
}
/* use any addresses that we found */
for(trp = arp; trp && nd < Maxdest; trp = trp->next){
cur = &qp->dest[nd];
parseip(cur->a, trp->ip->name);
/*
* straddling servers can reject all nameservers if they are all
* inside, so be sure to list at least one outside ns at
* the end of the ns list in /lib/ndb for `dom='.
*/
if (ipisbm(cur->a) ||
cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
continue;
cur->nx = 0;
cur->s = trp->owner;
cur->code = Rtimeout;
nd++;
}
lock(&dnlock);
rrfreelist(arp);
unlock(&dnlock);
if(nd >= Maxdest) /* dest array is full? */
return Maxdest - 1;
return nd;
}
/*
* cache negative responses
*/
static void
cacheneg(DN *dp, int type, int rcode, RR *soarr)
{
RR *rp;
DN *soaowner;
ulong ttl;
stats.negcached++;
/* no cache time specified, don't make anything up */
if(soarr != nil){
lock(&dnlock);
if(soarr->next != nil)
rrfreelistptr(&soarr->next);
unlock(&dnlock);
soaowner = soarr->owner;
} else
soaowner = nil;
/* the attach can cause soarr to be freed so mine it now */
if(soarr != nil && soarr->soa != nil)
ttl = soarr->soa->minttl+now;
else
ttl = 5*Min;
/* add soa and negative RR to the database */
rrattach(soarr, Authoritative);
rp = rralloc(type);
rp->owner = dp;
rp->negative = 1;
rp->negsoaowner = soaowner;
rp->negrcode = rcode;
rp->ttl = ttl;
rrattach(rp, Authoritative);
}
static int
setdestoutns(Dest *p, int n)
{
uchar *outns = outsidens(n);
destck(p);
destinit(p);
if (outns == nil) {
if (n == 0)
dnslog("[%d] no outside-ns in ndb", getpid());
return -1;
}
memmove(p->a, outns, sizeof p->a);
p->s = dnlookup("outside-ns-ips", Cin, 1);
return 0;
}
/*
* issue query via UDP or TCP as appropriate.
* for TCP, returns with qp->tcpip set from udppkt header.
*/
static int
mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
{
int rv = -1, nfd;
char *domain;
char conndir[NETPATHLEN], net[NETPATHLEN];
uchar belen[2];
NetConnInfo *nci;
queryck(qp);
domain = smprint("%I", udppkt);
if (myaddr(domain)) {
dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
domain);
free(domain);
return rv;
}
switch (medium) {
case Udp:
free(domain);
nfd = dup(qp->udpfd, -1);
if (nfd < 0) {
warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
close(qp->udpfd); /* ensure it's closed */
qp->udpfd = -1; /* poison it */
return rv;
}
close(nfd);
if (qp->udpfd <= 0)
dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
else {
if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
len+Udphdrsize)
warning("sending udp msg: %r");
else {
stats.qsent++;
rv = 0;
}
}
break;
case Tcp:
/* send via TCP & keep fd around for reply */
snprint(net, sizeof net, "%s/tcp",
(mntpt[0] != '\0'? mntpt: "/net"));
alarm(10*1000);
qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
conndir, &qp->tcpctlfd);
alarm(0);
if (qp->tcpfd < 0) {
dnslog("can't dial tcp!%s!dns: %r", domain);
free(domain);
break;
}
free(domain);
nci = getnetconninfo(conndir, qp->tcpfd);
if (nci) {
parseip(qp->tcpip, nci->rsys);
freenetconninfo(nci);
} else
dnslog("mydnsquery: getnetconninfo failed");
qp->tcpset = 1;
belen[0] = len >> 8;
belen[1] = len;
if (write(qp->tcpfd, belen, 2) != 2 ||
write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
warning("sending tcp msg: %r");
break;
default:
sysfatal("mydnsquery: bad medium");
}
return rv;
}
/*
* send query to all UDP destinations or one TCP destination,
* taken from obuf (udp packet) header
*/
static int
xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
{
int j, n;
char buf[32];
Dest *p;
queryck(qp);
if(timems() >= qp->req->aborttime)
return -1;
/*
* get a nameserver address if we need one.
* serveraddrs populates qp->dest.
*/
p = qp->dest;
destck(p);
if (qp->ndest < 0 || qp->ndest > Maxdest) {
dnslog("qp->ndest %d out of range", qp->ndest);
abort();
}
/*
* we're to transmit to more destinations than we currently have,
* so get another.
*/
if (qp->ndest > qp->curdest - p) {
j = serveraddrs(qp, qp->curdest - p, depth);
if (j < 0 || j >= Maxdest) {
dnslog("serveraddrs() result %d out of range", j);
abort();
}
qp->curdest = &qp->dest[j];
}
destck(qp->curdest);
/* no servers, punt */
if (qp->ndest == 0)
if (cfg.straddle && cfg.inside) {
/* get ips of "outside-ns-ips" */
qp->curdest = qp->dest;
for(n = 0; n < Maxdest; n++, qp->curdest++)
if (setdestoutns(qp->curdest, n) < 0)
break;
if(n == 0)
dnslog("xmitquery: %s: no outside-ns nameservers",
qp->dp->name);
} else
/* it's probably just a bogus domain, don't log it */
return -1;
/* send to first 'qp->ndest' destinations */
j = 0;
if (medium == Tcp) {
j++;
queryck(qp);
assert(qp->dp);
procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
qp->dp->name, rrname(qp->type, buf, sizeof buf));
mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
if(debug)
logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
qp->type);
} else
for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
/* skip destinations we've finished with */
if(p->nx >= Maxtrans)
continue;
j++;
/* exponential backoff of requests */
if((1<<p->nx) > qp->ndest)
continue;
if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
continue; /* mistake */
procsetname("udp %sside query to %I/%s %s %s",
(inns? "in": "out"), p->a, p->s->name,
qp->dp->name, rrname(qp->type, buf, sizeof buf));
if(debug)
logsend(qp->req->id, depth, p->a, p->s->name,
qp->dp->name, qp->type);
/* fill in UDP destination addr & send it */
memmove(obuf, p->a, sizeof p->a);
mydnsquery(qp, medium, obuf, len);
p->nx++;
}
if(j == 0) {
return -1;
}
return 0;
}
static int lckindex[Maxlcks] = {
0, /* all others map here */
Ta,
Tns,
Tcname,
Tsoa,
Tptr,
Tmx,
Ttxt,
Taaaa,
};
static int
qtype2lck(int qtype) /* map query type to querylck index */
{
int i;
for (i = 1; i < nelem(lckindex); i++)
if (lckindex[i] == qtype)
return i;
return 0;
}
/* is mp a cachable negative response (with Rname set)? */
static int
isnegrname(DNSmsg *mp)
{
/* TODO: could add || cfg.justforw to RHS of && */
return mp->an == nil && (mp->flags & Rmask) == Rname;
}
/* returns Answerr (-1) on errors, else number of answers, which can be zero. */
static int
procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
{
int rv;
// int lcktype;
char buf[32];
DN *ndp;
Query *nqp;
RR *tp, *soarr;
if (mp->an == nil)
stats.negans++;
/* ignore any error replies */
if((mp->flags & Rmask) == Rserver){
stats.negserver++;
freeanswers(mp);
if(p != qp->curdest)
p->code = Rserver;
return Answerr;
}
/* ignore any bad delegations */
if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
stats.negbaddeleg++;
if(mp->an == nil){
stats.negbdnoans++;
freeanswers(mp);
if(p != qp->curdest)
p->code = Rserver;
dnslog(" and no answers");
return Answerr;
}
dnslog(" but has answers; ignoring ns");
lock(&dnlock);
rrfreelistptr(&mp->ns);
unlock(&dnlock);
mp->nscount = 0;
}
/* remove any soa's from the authority section */
lock(&dnlock);
soarr = rrremtype(&mp->ns, Tsoa);
/* incorporate answers */
unique(mp->an);
unique(mp->ns);
unique(mp->ar);
unlock(&dnlock);
if(mp->an)
rrattach(mp->an, (mp->flags & Fauth) != 0);
if(mp->ar)
rrattach(mp->ar, Notauthoritative);
if(mp->ns && !cfg.justforw){
ndp = mp->ns->owner;
rrattach(mp->ns, Notauthoritative);
} else {
ndp = nil;
lock(&dnlock);
rrfreelistptr(&mp->ns);
unlock(&dnlock);
mp->nscount = 0;
}
/* free the question */
if(mp->qd) {
lock(&dnlock);
rrfreelistptr(&mp->qd);
unlock(&dnlock);
mp->qdcount = 0;
}
/*
* Any reply from an authoritative server,
* or a positive reply terminates the search.
* A negative response now also terminates the search.
*/
if(mp->an != nil || (mp->flags & Fauth)){
if(isnegrname(mp))
qp->dp->respcode = Rname;
else
qp->dp->respcode = Rok;
/*
* cache any negative responses, free soarr.
* negative responses need not be authoritative:
* they can legitimately come from a cache.
*/
if( /* (mp->flags & Fauth) && */ mp->an == nil)
cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
else {
lock(&dnlock);
rrfreelist(soarr);
unlock(&dnlock);
}
return 1;
} else if (isnegrname(mp)) {
qp->dp->respcode = Rname;
/*
* cache negative response.
* negative responses need not be authoritative:
* they can legitimately come from a cache.
*/
cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
return 1;
}
stats.negnorname++;
lock(&dnlock);
rrfreelist(soarr);
unlock(&dnlock);
/*
* if we've been given better name servers, recurse.
* if we're a pure resolver, don't recurse, we have
* to forward to a fixed set of named servers.
*/
if(!mp->ns || cfg.resolver && cfg.justforw)
return Answnone;
tp = rrlookup(ndp, Tns, NOneg);
if(contains(qp->nsrp, tp)){
lock(&dnlock);
rrfreelist(tp);
unlock(&dnlock);
return Answnone;
}
procsetname("recursive query for %s %s", qp->dp->name,
rrname(qp->type, buf, sizeof buf));
/*
* we're called from udpquery, called from
* netquery, which current holds qp->dp->querylck,
* so release it now and acquire it upon return.
*/
// lcktype = qtype2lck(qp->type); /* someday try this again */
// qunlock(&qp->dp->querylck[lcktype]);
nqp = emalloc(sizeof *nqp);
queryinit(nqp, qp->dp, qp->type, qp->req);
nqp->nsrp = tp;
rv = netquery(nqp, depth+1);
// qlock(&qp->dp->querylck[lcktype]);
rrfreelist(nqp->nsrp);
querydestroy(nqp);
free(nqp);
return rv;
}
/*
* send a query via tcp to a single address (from ibuf's udp header)
* and read the answer(s) into mp->an.
*/
static int
tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
ulong waitms, int inns, ushort req)
{
int rv = 0;
uvlong endms;
endms = timems() + waitms;
if(endms > qp->req->aborttime)
endms = qp->req->aborttime;
if (0)
dnslog("%s: udp reply truncated; retrying query via tcp to %I",
qp->dp->name, qp->tcpip);
qlock(&qp->tcplock);
memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */
/* sets qp->tcpip from obuf's udp header */
if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
rv = -1;
if (qp->tcpfd > 0) {
hangup(qp->tcpctlfd);
close(qp->tcpctlfd);
close(qp->tcpfd);
}
qp->tcpfd = qp->tcpctlfd = -1;
qunlock(&qp->tcplock);
return rv;
}
/*
* query name servers. fill in obuf with on-the-wire representation of a
* DNSmsg derived from qp. if the name server returns a pointer to another
* name server, recurse.
*/
static int
queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
{
int ndest, len, replywaits, rv;
ushort req;
uvlong endms;
char buf[12];
uchar srcip[IPaddrlen];
Dest *p, *np, *dest;
/* pack request into a udp message */
req = rand();
len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
/* no server addresses yet */
queryck(qp);
dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */
for (p = dest; p < dest + Maxdest; p++)
destinit(p);
/* this dest array is local to this call of queryns() */
free(qp->dest);
qp->curdest = qp->dest = dest;
/*
* transmit udp requests and wait for answers.
* at most Maxtrans attempts to each address.
* each cycle send one more message than the previous.
* retry a query via tcp if its response is truncated.
*/
for(ndest = 1; ndest < Maxdest; ndest++){
qp->ndest = ndest;
qp->tcpset = 0;
if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
break;
endms = timems() + waitms;
if(endms > qp->req->aborttime)
endms = qp->req->aborttime;
for(replywaits = 0; replywaits < ndest; replywaits++){
DNSmsg m;
procsetname("reading %sside reply from %I: %s %s from %s",
(inns? "in": "out"), obuf, qp->dp->name,
rrname(qp->type, buf, sizeof buf), qp->req->from);
/* read udp answer into m */
if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
memmove(srcip, ibuf, IPaddrlen);
else if (!(m.flags & Ftrunc)) {
freeanswers(&m);
break; /* timed out on this dest */
} else {
/* whoops, it was truncated! ask again via tcp */
freeanswers(&m);
rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
waitms, inns, req); /* answer in m */
if (rv < 0) {
freeanswers(&m);
break; /* failed via tcp too */
}
memmove(srcip, qp->tcpip, IPaddrlen);
}
/* find responder */
// dnslog("queryns got reply from %I", srcip);
for(p = qp->dest; p < qp->curdest; p++)
if(memcmp(p->a, srcip, sizeof p->a) == 0)
break;
/* remove all addrs of responding server from list */
for(np = qp->dest; np < qp->curdest; np++)
if(np->s == p->s)
np->nx = Maxtrans;
/* free or incorporate RRs in m */
rv = procansw(qp, &m, srcip, depth, p);
if (rv > Answnone) {
free(qp->dest);
qp->dest = qp->curdest = nil; /* prevent accidents */
return rv;
}
}
}
/* if all servers returned failure, propagate it */
qp->dp->respcode = Rserver;
for(p = dest; p < qp->curdest; p++) {
destck(p);
if(p->code != Rserver)
qp->dp->respcode = Rok;
p->magic = 0; /* prevent accidents */
}
// if (qp->dp->respcode)
// dnslog("queryns setting Rserver for %s", qp->dp->name);
free(qp->dest);
qp->dest = qp->curdest = nil; /* prevent accidents */
return Answnone;
}
/*
* run a command with a supplied fd as standard input
*/
char *
system(int fd, char *cmd)
{
int pid, p, i;
static Waitmsg msg;
if((pid = fork()) == -1)
sysfatal("fork failed: %r");
else if(pid == 0){
dup(fd, 0);
close(fd);
for (i = 3; i < 200; i++)
close(i); /* don't leak fds */
execl("/bin/rc", "rc", "-c", cmd, nil);
sysfatal("exec rc: %r");
}
for(p = waitpid(); p >= 0; p = waitpid())
if(p == pid)
return msg.msg;
return "lost child";
}
/* compute wait, weighted by probability of success, with bounds */
static ulong
weight(ulong ms, unsigned pcntprob)
{
ulong wait;
wait = (ms * pcntprob) / 100;
if (wait < Minwaitms)
wait = Minwaitms;
if (wait > Maxwaitms)
wait = Maxwaitms;
return wait;
}
/*
* in principle we could use a single descriptor for a udp port
* to send all queries and receive all the answers to them,
* but we'd have to sort out the answers by dns-query id.
*/
static int
udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
{
int fd, rv;
ulong now, pcntprob;
uvlong wait, reqtm;
char *msg;
uchar *obuf, *ibuf;
static QLock mntlck;
static ulong lastmount;
fd = udpport(mntpt);
while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
/* HACK: remount /net.alt */
now = time(nil);
if (now < lastmount + Remntretry)
sleep(S2MS(lastmount + Remntretry - now));
qlock(&mntlck);
fd = udpport(mntpt); /* try again under lock */
if (fd < 0) {
dnslog("[%d] remounting /net.alt", getpid());
unmount(nil, "/net.alt");
msg = system(open("/dev/null", ORDWR), "outside");
lastmount = time(nil);
if (msg && *msg) {
dnslog("[%d] can't remount /net.alt: %s",
getpid(), msg);
sleep(10*1000); /* don't spin remounting */
} else
fd = udpport(mntpt);
}
qunlock(&mntlck);
}
if (fd < 0) {
dnslog("can't get udpport for %s query of name %s: %r",
mntpt, qp->dp->name);
sysfatal("out of udp conversations"); /* we're buggered */
}
/*
* Our QIP servers are busted and respond to AAAA and CNAME queries
* with (sometimes malformed [too short] packets and) no answers and
* just NS RRs but not Rname errors. so make time-to-wait
* proportional to estimated probability of an RR of that type existing.
*/
if (qp->type >= nelem(likely))
pcntprob = 35; /* unpopular query type */
else
pcntprob = likely[qp->type];
reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
wait = weight(reqtm / 3, pcntprob); /* time for one udp query */
qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
/* use alloced buffers rather than ones from the stack */
ibuf = emalloc(64*1024); /* max. tcp reply size */
obuf = emalloc(Maxpayload+Udphdrsize);
qp->udpfd = fd;
rv = queryns(qp, depth, ibuf, obuf, wait, inns);
close(fd);
qp->udpfd = -1;
free(obuf);
free(ibuf);
return rv;
}
/*
* look up (qp->dp->name, qp->type) rr in dns,
* using nameservers in qp->nsrp.
*/
static int
netquery(Query *qp, int depth)
{
int lock, rv, triedin, inname;
char buf[32];
RR *rp;
DN *dp;
Querylck *qlp;
static int whined;
rv = Answnone; /* pessimism */
if(depth > 12) /* in a recursive loop? */
return Answnone;
slave(qp->req);
/*
* slave might have forked. if so, the parent process longjmped to
* req->mret; we're usually the child slave, but if there are too
* many children already, we're still the same process.
*/
/*
* don't lock before call to slave so only children can block.
* just lock at top-level invocation.
*/
lock = depth <= 1 && qp->req->isslave;
dp = qp->dp; /* ensure that it doesn't change underfoot */
qlp = nil;
if(lock) {
procsetname("query lock wait: %s %s from %s", dp->name,
rrname(qp->type, buf, sizeof buf), qp->req->from);
/*
* don't make concurrent queries for this name.
* dozens of processes blocking here probably indicates
* an error in our dns data that causes us to not
* recognise a zone (area) as one of our own, thus
* causing us to query other nameservers.
*/
qlp = &dp->querylck[qtype2lck(qp->type)];
qlock(qlp);
if (qlp->Ref.ref > Maxoutstanding) {
qunlock(qlp);
if (!whined) {
whined = 1;
dnslog("too many outstanding queries for %s;"
" dropping this one; no further logging"
" of drops", dp->name);
}
return 0;
}
++qlp->Ref.ref;
qunlock(qlp);
}
procsetname("netquery: %s", dp->name);
/* prepare server RR's for incremental lookup */
for(rp = qp->nsrp; rp; rp = rp->next)
rp->marker = 0;
triedin = 0;
/*
* normal resolvers and servers will just use mntpt for all addresses,
* even on the outside. straddling servers will use mntpt (/net)
* for inside addresses and /net.alt for outside addresses,
* thus bypassing other inside nameservers.
*/
inname = insideaddr(dp->name);
if (!cfg.straddle || inname) {
rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
triedin = 1;
}
/*
* if we're still looking, are inside, and have an outside domain,
* try it on our outside interface, if any.
*/
if (rv == Answnone && cfg.inside && !inname) {
if (triedin)
dnslog(
"[%d] netquery: internal nameservers failed for %s; trying external",
getpid(), dp->name);
/* prepare server RR's for incremental lookup */
for(rp = qp->nsrp; rp; rp = rp->next)
rp->marker = 0;
rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
}
// if (rv == Answnone) /* could ask /net.alt/dns directly */
// askoutdns(dp, qp->type);
if(lock && qlp) {
qlock(qlp);
assert(qlp->Ref.ref > 0);
qunlock(qlp);
decref(qlp);
}
return rv;
}
int
seerootns(void)
{
int rv;
char root[] = "";
Request req;
RR *rr;
Query *qp;
memset(&req, 0, sizeof req);
req.isslave = 1;
req.aborttime = timems() + Maxreqtm;
req.from = "internal";
qp = emalloc(sizeof *qp);
queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
for (rr = qp->nsrp; rr != nil; rr = rr->next) /* DEBUG */
dnslog("seerootns query nsrp: %R", rr);
rv = netquery(qp, 0); /* lookup ". ns" using qp->nsrp */
rrfreelist(qp->nsrp);
querydestroy(qp);
free(qp);
return rv;
}
|