/* Parse a string, yielding a struct partime that describes it. */
/* Copyright 1993, 1994, 1995, 1997 Paul Eggert
Distributed under license by the Free Software Foundation, Inc.
This file is part of RCS.
RCS is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
RCS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RCS; see the file COPYING.
If not, write to the Free Software Foundation,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Report problems and direct all questions to:
[email protected]
*/
#if has_conf_h
# include <conf.h>
#else
# if HAVE_CONFIG_H
# include <config.h>
# else
# ifndef __STDC__
# define const
# endif
# endif
# if HAVE_LIMITS_H
# include <limits.h>
# endif
# ifndef LONG_MIN
# define LONG_MIN (-1-2147483647L)
# endif
# if STDC_HEADERS
# include <stdlib.h>
# endif
# include <time.h>
# ifdef __STDC__
# define P(x) x
# else
# define P(x) ()
# endif
#endif
#include <ctype.h>
#if STDC_HEADERS
# define CTYPE_DOMAIN(c) 1
#else
# define CTYPE_DOMAIN(c) ((unsigned) (c) <= 0177)
#endif
#define ISALNUM(c) (CTYPE_DOMAIN (c) && isalnum (c))
#define ISALPHA(c) (CTYPE_DOMAIN (c) && isalpha (c))
#define ISSPACE(c) (CTYPE_DOMAIN (c) && isspace (c))
#define ISUPPER(c) (CTYPE_DOMAIN (c) && isupper (c))
#define ISDIGIT(c) ((unsigned) (c) - '0' <= 9)
#include <partime.h>
char const partimeId[] =
"$Id: partime.c,v 5.16 1997/05/19 06:33:53 eggert Exp $";
/* Lookup tables for names of months, weekdays, time zones. */
#define NAME_LENGTH_MAXIMUM 4
struct name_val
{
char name[NAME_LENGTH_MAXIMUM];
int val;
};
static char const *parse_decimal P ((char const *, int, int, int, int, int *, int *));
static char const *parse_fixed P ((char const *, int, int *));
static char const *parse_pattern_letter P ((char const *, int, struct partime *));
static char const *parse_prefix P ((char const *, struct partime *, int *));
static char const *parse_ranged P ((char const *, int, int, int, int *));
static int lookup P ((char const *, struct name_val const[]));
static int merge_partime P ((struct partime *, struct partime const *));
static void undefine P ((struct partime *));
static struct name_val const month_names[] =
{
{"jan", 0},
{"feb", 1},
{"mar", 2},
{"apr", 3},
{"may", 4},
{"jun", 5},
{"jul", 6},
{"aug", 7},
{"sep", 8},
{"oct", 9},
{"nov", 10},
{"dec", 11},
{"", TM_UNDEFINED}
};
static struct name_val const weekday_names[] =
{
{"sun", 0},
{"mon", 1},
{"tue", 2},
{"wed", 3},
{"thu", 4},
{"fri", 5},
{"sat", 6},
{"", TM_UNDEFINED}
};
#define hr60nonnegative(t) ((t)/100 * 60 + (t)%100)
#define hr60(t) ((t)<0 ? -hr60nonnegative(-(t)) : hr60nonnegative(t))
#define zs(t,s) {s, hr60(t)}
#define zd(t,s,d) zs(t, s), zs((t)+100, d)
static struct name_val const zone_names[] =
{
zs (-1000, "hst"), /* Hawaii */
zd (-1000, "hast", "hadt"), /* Hawaii-Aleutian */
zd (- 900, "akst", "akdt"), /* Alaska */
zd (- 800, "pst" , "pdt" ), /* Pacific */
zd (- 700, "mst" , "mdt" ), /* Mountain */
zd (- 600, "cst" , "cdt" ), /* Central */
zd (- 500, "est" , "edt" ), /* Eastern */
zd (- 400, "ast" , "adt" ), /* Atlantic */
zd (- 330, "nst" , "ndt" ), /* Newfoundland */
zs ( 000, "utc" ), /* Coordinated Universal */
zs ( 000, "uct" ), /* " */
zs ( 000, "cut" ), /* " */
zs ( 000, "ut"), /* Universal */
zs ( 000, "z"), /* Zulu (required by ISO 8601) */
zd ( 000, "gmt" , "bst" ), /* Greenwich Mean, British Summer */
zd ( 000, "wet" , "west"), /* Western European */
zd ( 100, "cet" , "cest"), /* Central European */
zd ( 100, "met" , "mest"), /* Middle European (bug in old tz versions) */
zd ( 100, "mez" , "mesz"), /* Mittel-Europaeische Zeit */
zd ( 200, "eet" , "eest"), /* Eastern European */
zs ( 530, "ist" ), /* India */
zd ( 900, "jst" , "jdt" ), /* Japan */
zd ( 900, "kst" , "kdt" ), /* Korea */
zd ( 1200, "nzst", "nzdt"), /* New Zealand */
{"lt", 1},
#if 0
/* The following names are duplicates or are not well attested.
There are lots more where these came from. */
zs (-1100, "sst" ), /* Samoan */
zd (- 900, "yst" , "ydt" ), /* Yukon - name is no longer used */
zd (- 500, "ast" , "adt" ), /* Acre */
zd (- 400, "wst" , "wdt" ), /* Western Brazil */
zd (- 400, "cst" , "cdt" ), /* Chile */
zd (- 200, "fst" , "fdt" ), /* Fernando de Noronha */
zs ( 000, "wat" ), /* West African */
zs ( 100, "cat" ), /* Central African */
zs ( 200, "sat" ), /* South African */
zd ( 200, "ist" , "idt" ), /* Israel */
zs ( 300, "eat" ), /* East African */
zd ( 300, "msk" , "msd" ), /* Moscow */
zd ( 330, "ist" , "idt" ), /* Iran */
zs ( 800, "hkt" ), /* Hong Kong */
zs ( 800, "sgt" ), /* Singapore */
zd ( 800, "cst" , "cdt" ), /* China */
zd ( 800, "wst" , "wst" ), /* Western Australia */
zd ( 930, "cst" , "cst" ), /* Central Australia */
zs ( 1000, "gst" ), /* Guam */
zd ( 1000, "est" , "est" ), /* Eastern Australia */
#endif
{"", -1}
};
/* Look for a prefix of S in TABLE, returning val for first matching entry. */
static int
lookup (s, table)
char const *s;
struct name_val const table[];
{
int j;
char buf[NAME_LENGTH_MAXIMUM];
for (j = 0; j < NAME_LENGTH_MAXIMUM; j++)
{
unsigned char c = *s++;
if (! ISALPHA (c))
{
buf[j] = '\0';
break;
}
buf[j] = ISUPPER (c) ? tolower (c) : c;
}
for (;; table++)
for (j = 0; ; j++)
if (j == NAME_LENGTH_MAXIMUM || ! table[0].name[j])
return table[0].val;
else if (buf[j] != table[0].name[j])
break;
}
/* Set *T to ``undefined'' values. */
static void
undefine (t)
struct partime *t;
{
t->tm.tm_sec = t->tm.tm_min = t->tm.tm_hour = t->tm.tm_mday = t->tm.tm_mon
= t->tm.tm_year = t->tm.tm_wday = t->tm.tm_yday
= t->ymodulus = t->yweek
= TM_UNDEFINED;
t->zone = TM_UNDEFINED_ZONE;
}
/* Array of patterns to look for in a date string.
Order is important: we look for the first matching pattern
whose values do not contradict values that we already know about.
See `parse_pattern_letter' below for the meaning of the pattern codes. */
static char const *const patterns[] =
{
/* These traditional patterns must come first,
to prevent an ISO 8601 format from misinterpreting their prefixes. */
"E_n_y", "x", /* RFC 822 */
"E_n", "n_E", "n", "t:m:s_A", "t:m_A", "t_A", /* traditional */
"y/N/D$", /* traditional RCS */
/* ISO 8601:1988 formats, generalized a bit. */
"y-N-D$", "4ND$", "Y-N$",
"RND$", "-R=N$", "-R$", "--N=D$", "N=DT",
"--N$", "---D$", "DT",
"Y-d$", "4d$", "R=d$", "-d$", "dT",
"y-W-X", "yWX", "y=W",
"-r-W-X", "r-W-XT", "-rWX", "rWXT", "-W=X", "W=XT", "-W",
"-w-X", "w-XT", "---X$", "XT", "4$",
"T",
"h:m:s$", "hms$", "h:m$", "hm$", "h$", "-m:s$", "-ms$", "-m$", "--s$",
"Y", "Z",
0
};
/* Parse an initial prefix of STR, setting *T accordingly.
Return the first character after the prefix, or 0 if it couldn't be parsed.
Start with pattern *PI; if success, set *PI to the next pattern to try.
Set *PI to -1 if we know there are no more patterns to try;
if *PI is initially negative, give up immediately. */
static char const *
parse_prefix (str, t, pi)
char const *str;
struct partime *t;
int *pi;
{
int i = *pi;
char const *pat;
unsigned char c;
if (i < 0)
return 0;
/* Remove initial noise. */
while (! ISALNUM (c = *str) && c != '-' && c != '+')
{
if (! c)
{
undefine (t);
*pi = -1;
return str;
}
str++;
}
/* Try a pattern until one succeeds. */
while ((pat = patterns[i++]) != 0)
{
char const *s = str;
undefine (t);
do
{
if (! (c = *pat++))
{
*pi = i;
return s;
}
}
while ((s = parse_pattern_letter (s, c, t)) != 0);
}
return 0;
}
/* Parse an initial prefix of S of length DIGITS; it must be a number.
Store the parsed number into *RES.
Return the first character after the prefix, or 0 if it wasn't parsed. */
static char const *
parse_fixed (s, digits, res)
char const *s;
int digits, *res;
{
int n = 0;
char const *lim = s + digits;
while (s < lim)
{
unsigned d = *s++ - '0';
if (9 < d)
return 0;
n = 10 * n + d;
}
*res = n;
return s;
}
/* Parse an initial prefix of S of length DIGITS;
it must be a number in the range LO through HI.
Store the parsed number into *RES.
Return the first character after the prefix, or 0 if it wasn't parsed. */
static char const *
parse_ranged (s, digits, lo, hi, res)
char const *s;
int digits, lo, hi, *res;
{
s = parse_fixed (s, digits, res);
return s && lo <= *res && *res <= hi ? s : 0;
}
/* Parse an initial prefix of S of length DIGITS;
it must be a number in the range LO through HI
and it may be followed by a fraction to be computed using RESOLUTION.
Store the parsed number into *RES; store the fraction times RESOLUTION,
rounded to the nearest integer, into *FRES.
Return the first character after the prefix, or 0 if it wasn't parsed. */
static char const *
parse_decimal (s, digits, lo, hi, resolution, res, fres)
char const *s;
int digits, lo, hi, resolution, *res, *fres;
{
s = parse_fixed (s, digits, res);
if (s && lo <= *res && *res <= hi)
{
int f = 0;
if ((s[0] == ',' || s[0] == '.') && ISDIGIT (s[1]))
{
char const *s1 = ++s;
int num10 = 0, denom10 = 10, product;
while (ISDIGIT (*++s))
{
int d = denom10 * 10;
if (d / 10 != denom10)
return 0; /* overflow */
denom10 = d;
}
s = parse_fixed (s1, (int) (s - s1), &num10);
product = num10 * resolution;
f = (product + (denom10 >> 1)) / denom10;
f -= f & (product % denom10 == denom10 >> 1); /* round to even */
if (f < 0 || product/resolution != num10)
return 0; /* overflow */
}
*fres = f;
return s;
}
return 0;
}
/* Parse an initial prefix of S; it must denote a time zone.
Set *ZONE to the number of seconds east of GMT,
or to TM_LOCAL_ZONE if it is the local time zone.
Return the first character after the prefix, or 0 if it wasn't parsed. */
char *
parzone (s, zone)
char const *s;
long *zone;
{
char sign;
int hh, mm, ss;
int minutesEastOfUTC;
long offset, z;
/* The formats are LT, n, n DST, nDST, no, o
where n is a time zone name
and o is a time zone offset of the form [-+]hh[:mm[:ss]]. */
switch (*s)
{
case '-':
case '+':
z = 0;
break;
default:
minutesEastOfUTC = lookup (s, zone_names);
if (minutesEastOfUTC == -1)
return 0;
/* Don't bother to check rest of spelling. */
while (ISALPHA ((unsigned char) *s))
s++;
/* Don't modify LT. */
if (minutesEastOfUTC == 1)
{
*zone = TM_LOCAL_ZONE;
return (char *) s;
}
z = minutesEastOfUTC * 60L;
/* Look for trailing " DST". */
if ((s[-1] == 'T' || s[-1] == 't')
&& (s[-2] == 'S' || s[-2] == 's')
&& (s[-3] == 'D' || s[-3] == 'd'))
goto trailing_dst;
while (ISSPACE ((unsigned char) *s))
s++;
if ((s[0] == 'D' || s[0] == 'd')
&& (s[1] == 'S' || s[1] == 's')
&& (s[2] == 'T' || s[2] == 't'))
{
s += 3;
trailing_dst:
*zone = z + 60*60;
return (char *) s;
}
switch (*s)
{
case '-':
case '+':
break;
default:
*zone = z;
return (char *) s;
}
break;
}
sign = *s++;
if (! (s = parse_ranged (s, 2, 0, 23, &hh)))
return 0;
mm = ss = 0;
if (*s == ':')
s++;
if (ISDIGIT (*s))
{
if (! (s = parse_ranged (s, 2, 0, 59, &mm)))
return 0;
if (*s == ':' && s[-3] == ':' && ISDIGIT (s[1])
&& ! (s = parse_ranged (s + 1, 2, 0, 59, &ss)))
return 0;
}
if (ISDIGIT (*s))
return 0;
offset = (hh * 60 + mm) * 60L + ss;
*zone = z + (sign == '-' ? -offset : offset);
/* ?? Are fractions allowed here? If so, they're not implemented. */
return (char *) s;
}
/* Parse an initial prefix of S, matching the pattern whose code is C.
Set *T accordingly.
Return the first character after the prefix, or 0 if it wasn't parsed. */
static char const *
parse_pattern_letter (s, c, t)
char const *s;
int c;
struct partime *t;
{
switch (c)
{
case '$': /* The next character must be a non-digit. */
if (ISDIGIT (*s))
return 0;
break;
case '-':
case '/':
case ':':
/* These characters stand for themselves. */
if (*s++ != c)
return 0;
break;
case '4': /* 4-digit year */
s = parse_fixed (s, 4, &t->tm.tm_year);
break;
case '=': /* optional '-' */
s += *s == '-';
break;
case 'A': /* AM or PM */
/* This matches the regular expression [AaPp][Mm]?.
It must not be followed by a letter or digit;
otherwise it would match prefixes of strings like "PST". */
switch (*s++)
{
case 'A':
case 'a':
if (t->tm.tm_hour == 12)
t->tm.tm_hour = 0;
break;
case 'P':
case 'p':
if (t->tm.tm_hour != 12)
t->tm.tm_hour += 12;
break;
default:
return 0;
}
switch (*s)
{
case 'M':
case 'm':
s++;
break;
}
if (ISALNUM ((unsigned char) *s))
return 0;
break;
case 'D': /* day of month [01-31] */
s = parse_ranged (s, 2, 1, 31, &t->tm.tm_mday);
break;
case 'd': /* day of year [001-366] */
s = parse_ranged (s, 3, 1, 366, &t->tm.tm_yday);
t->tm.tm_yday--;
break;
case 'E': /* extended day of month [1-9, 01-31] */
s = parse_ranged (s, (ISDIGIT (s[0]) && ISDIGIT (s[1])) + 1, 1, 31,
&t->tm.tm_mday);
break;
case 'h': /* hour [00-23 followed by optional fraction] */
{
int frac;
s = parse_decimal (s, 2, 0, 23, 60 * 60, &t->tm.tm_hour, &frac);
t->tm.tm_min = frac / 60;
t->tm.tm_sec = frac % 60;
}
break;
case 'm': /* minute [00-59 followed by optional fraction] */
s = parse_decimal (s, 2, 0, 59, 60, &t->tm.tm_min, &t->tm.tm_sec);
break;
case 'n': /* month name [e.g. "Jan"] */
if (! TM_DEFINED (t->tm.tm_mon = lookup (s, month_names)))
return 0;
/* Don't bother to check rest of spelling. */
while (ISALPHA ((unsigned char) *s))
s++;
break;
case 'N': /* month [01-12] */
s = parse_ranged (s, 2, 1, 12, &t->tm.tm_mon);
t->tm.tm_mon--;
break;
case 'r': /* year % 10 (remainder in origin-0 decade) [0-9] */
s = parse_fixed (s, 1, &t->tm.tm_year);
t->ymodulus = 10;
break;
case_R:
case 'R': /* year % 100 (remainder in origin-0 century) [00-99] */
s = parse_fixed (s, 2, &t->tm.tm_year);
t->ymodulus = 100;
break;
case 's': /* second [00-60 followed by optional fraction] */
{
int frac;
s = parse_decimal (s, 2, 0, 60, 1, &t->tm.tm_sec, &frac);
t->tm.tm_sec += frac;
}
break;
case 'T': /* 'T' or 't' */
switch (*s++)
{
case 'T':
case 't':
break;
default:
return 0;
}
break;
case 't': /* traditional hour [1-9 or 01-12] */
s = parse_ranged (s, (ISDIGIT (s[0]) && ISDIGIT (s[1])) + 1, 1, 12,
&t->tm.tm_hour);
break;
case 'w': /* 'W' or 'w' only (stands for current week) */
switch (*s++)
{
case 'W':
case 'w':
break;
default:
return 0;
}
break;
case 'W': /* 'W' or 'w', followed by a week of year [00-53] */
switch (*s++)
{
case 'W':
case 'w':
break;
default:
return 0;
}
s = parse_ranged (s, 2, 0, 53, &t->yweek);
break;
case 'X': /* weekday (1=Mon ... 7=Sun) [1-7] */
s = parse_ranged (s, 1, 1, 7, &t->tm.tm_wday);
t->tm.tm_wday--;
break;
case 'x': /* weekday name [e.g. "Sun"] */
if (! TM_DEFINED (t->tm.tm_wday = lookup (s, weekday_names)))
return 0;
/* Don't bother to check rest of spelling. */
while (ISALPHA ((unsigned char) *s))
s++;
break;
case 'y': /* either R or Y */
if (ISDIGIT (s[0]) && ISDIGIT (s[1]) && ! ISDIGIT (s[2]))
goto case_R;
/* fall into */
case 'Y': /* year in full [4 or more digits] */
{
int len = 0;
while (ISDIGIT (s[len]))
len++;
if (len < 4)
return 0;
s = parse_fixed (s, len, &t->tm.tm_year);
}
break;
case 'Z': /* time zone */
s = parzone (s, &t->zone);
break;
case '_': /* possibly empty sequence of non-alphanumerics */
while (! ISALNUM ((unsigned char) *s) && *s)
s++;
break;
default: /* bad pattern */
return 0;
}
return s;
}
/* If there is no conflict, merge into *T the additional information in *U
and return 0. Otherwise do nothing and return -1. */
static int
merge_partime (t, u)
struct partime *t;
struct partime const *u;
{
# define conflict(a,b) ((a) != (b) && TM_DEFINED (a) && TM_DEFINED (b))
if (conflict (t->tm.tm_sec, u->tm.tm_sec)
|| conflict (t->tm.tm_min, u->tm.tm_min)
|| conflict (t->tm.tm_hour, u->tm.tm_hour)
|| conflict (t->tm.tm_mday, u->tm.tm_mday)
|| conflict (t->tm.tm_mon, u->tm.tm_mon)
|| conflict (t->tm.tm_year, u->tm.tm_year)
|| conflict (t->tm.tm_wday, u->tm.tm_yday)
|| conflict (t->ymodulus, u->ymodulus)
|| conflict (t->yweek, u->yweek)
|| (t->zone != u->zone
&& t->zone != TM_UNDEFINED_ZONE
&& u->zone != TM_UNDEFINED_ZONE))
return -1;
# undef conflict
# define merge_(a,b) if (TM_DEFINED (b)) (a) = (b);
merge_ (t->tm.tm_sec, u->tm.tm_sec)
merge_ (t->tm.tm_min, u->tm.tm_min)
merge_ (t->tm.tm_hour, u->tm.tm_hour)
merge_ (t->tm.tm_mday, u->tm.tm_mday)
merge_ (t->tm.tm_mon, u->tm.tm_mon)
merge_ (t->tm.tm_year, u->tm.tm_year)
merge_ (t->tm.tm_wday, u->tm.tm_yday)
merge_ (t->ymodulus, u->ymodulus)
merge_ (t->yweek, u->yweek)
# undef merge_
if (u->zone != TM_UNDEFINED_ZONE)
t->zone = u->zone;
return 0;
}
/* Parse a date/time prefix of S, putting the parsed result into *T.
Return the first character after the prefix.
The prefix may contain no useful information;
in that case, *T will contain only undefined values. */
char *
partime (s, t)
char const *s;
struct partime *t;
{
struct partime p;
undefine (t);
while (*s)
{
int i = 0;
char const *s1;
do
{
if (! (s1 = parse_prefix (s, &p, &i)))
return (char *) s;
}
while (merge_partime (t, &p) != 0);
s = s1;
}
return (char *) s;
}
|