/*
* file.c
*/
/*
* mpage: a program to reduce pages of print so that several pages
* of output appear on one printed page.
*
* Copyright (c) 1994-2004 Marcel J.E. Mol, The Netherlands
* Copyright (c) 1988 Mark P. Hahn, Herndon, Virginia
*
* Permission is granted to anyone to make or distribute verbatim
* copies of this document as received, in any medium, provided
* that this copyright notice is preserved, and that the
* distributor grants the recipient permission for further
* redistribution as permitted by this notice.
*
*/
#include "mpage.h"
static int looks_utf8(FILE *fp);
/*
* do_file converts one file into postscript for output. The file type is
* determined then the proper conversion routine is selected.
*/
void
do_file(fname, asheet, outfd)
char *fname;
struct sheet *asheet;
FILE *outfd;
{
FILE *fd;
int in_type;
/*
* Open fname and try to figure out what type of file it is
*/
if ((fd = fopen(fname, "r")) == NULL) {
fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname);
perror(MPAGE);
return;
}
/*
* if we have the pr option, then we have to assume it's a text file
*/
if (opt_pr || opt_input == IN_ASCII)
in_type = IN_ASCII;
else {
/*
* check for the cutomary characters that flag a postscript file
*/
if (ps_check(fd))
in_type = IN_PS;
else
in_type = IN_ASCII;
}
/*
* For text input check if input is UTF-8 or not
*/
if (in_type == IN_ASCII && check_utf8 && looks_utf8 (fd))
use_utf8 = 1;
(void) fclose(fd);
if (opt_pr) {
do_pr_file(fname, asheet, outfd);
return;
}
/*
* if not using pr(1), open fname and run th file trough the
* specific processor.
*/
if ((fd = fopen(fname, "r")) == NULL) {
fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname);
perror(MPAGE);
return;
}
switch (in_type) {
case IN_ASCII: do_text_doc(fd, asheet, outfd, fname);
break;
case IN_PS: do_ps_doc(fd, asheet, outfd, fname);
break;
/* Default figure out ourselfes */
}
(void) fclose(fd);
return;
} /* do_file */
/*
* do_pr_file processes one text file into postscript, but first runs the file
* through pr(1).
*/
void
do_pr_file(fname, asheet, outfd)
char *fname;
struct sheet *asheet;
FILE *outfd;
{
FILE *fd;
char command[LINESIZE];
/*
* build the proper command based upon a specified
* header or not
*/
#define DASHES "-- "
if (opt_header != NULL)
(void)sprintf(command, "%s -l%d -w%d -h \"%s\" %s%s", prprog,
asheet->sh_plength, asheet->sh_cwidth, opt_header,
fname[0] == '-' ? DASHES : "", fname);
else
(void)sprintf(command, "%s -l%d -w%d %s%s", prprog,
asheet->sh_plength, asheet->sh_cwidth,
fname[0] == '-' ? DASHES : "", fname);
/*
* open a pipe to the proper pr(1) command, and pr provides
* us with the input
*/
if ((fd = popen(command, "r")) == NULL) {
fprintf(stderr, "%s: cannot create pipe for '%s'\n", MPAGE, command);
perror(MPAGE);
}
else {
do_text_doc(fd, asheet, outfd, fname);
(void)pclose(fd);
}
return;
} /* do_pr_file */
#ifdef PLAN9
int
mkstemp(char *template)
{
char *s;
int i, fd;
s = strdup(template);
if(s == NULL)
return -1;
for(i=0; i<20; i++){
strcpy(s, template);
mktemp(s);
if((fd = creat(s, 0666)) >= 0){
strcpy(template, s);
free(s);
return fd;
}
}
free(s);
return -1;
}
#endif
/*
* do_stdin uses do_????_doc to process the standard input
*/
void
do_stdin(asheet, outfd)
struct sheet *asheet;
FILE *outfd;
{
#if 1
FILE *fd;
char buffer[LINESIZE];
char tmpfile[LINESIZE];
int incnt, outcnt;
int tmpfd;
/*
* Now the utf8 patch is in we always create a temporary file.
* So now is the time to just create a temp file and continue
* as if a filename was passed. This has some minor change
* on the output pages as it does nit show <stdin> anymore
* but the tmpfilename
*/
(void) strcpy(tmpfile, "/tmp/mpage-stdin-XXXXXX");
if ( (tmpfd = mkstemp(tmpfile)) == -1) {
fprintf(stderr, "%s: cannot create temporary file", MPAGE);
perror(MPAGE);
return;
}
close(tmpfd);
if ((fd = fopen (tmpfile, "w")) == NULL) {
fprintf(stderr, "%s: cannot reopen temporary file", MPAGE);
perror(MPAGE);
return;
}
do {
incnt = fread(buffer, 1, sizeof buffer, stdin);
outcnt = fwrite(buffer, 1, incnt, fd);
} while (incnt && outcnt);
(void) fclose(fd);
do_file(tmpfile, asheet, outfd);
(void) unlink(tmpfile);
return;
#else
FILE *fd;
char command[LINESIZE];
char tmpfile[LINESIZE];
char buffer[LINESIZE];
int incnt, outcnt;
int tmpfd;
if (opt_pr) {
Debug(DB_STDIN, "%%do_stdin: pr option selects text\n", 0);
/*
* if pr(1) is to be used we need to read the input
* and pass it to a pr(1) command which will write
* a temporary file; this temporary file will then
* be used as input to the do_doc routine
*/
(void)strcpy(tmpfile, "/tmp/mpageXXXXXX");
if ( (tmpfd = mkstemp(tmpfile)) == -1) {
fprintf(stderr, "%s: cannot create temporary file", MPAGE);
perror(MPAGE);
return;
}
close(tmpfd);
if (opt_header != NULL)
(void)sprintf(command, "%s -l%d -w%d -h \"%s\" > %s", prprog,
asheet->sh_plength, asheet->sh_cwidth,
opt_header, tmpfile);
else
(void)sprintf(command, "%s -l%d -w%d > %s", prprog,
asheet->sh_plength, asheet->sh_cwidth, tmpfile);
/*
* open a pipe to the pr(1) command which will create a
* temporary file for convertin into PS
*/
if ((fd = popen(command, "w")) == NULL) {
fprintf(stderr, "%s: cannot create pipe for '%s'\n",
MPAGE, command);
perror(MPAGE);
return;
}
#ifdef DEBUG
errno = 0;
Debug(DB_STDIN, "%% sizeof buffer == %d\n", sizeof buffer);
#endif
/*
* read input to mpage and pass it onto the pr(1) command
*/
do {
incnt = fread(buffer, 1, sizeof buffer, stdin);
outcnt = fwrite(buffer, 1, incnt, fd);
Debug(DB_STDIN, "%% incnt == %d,", incnt);
Debug(DB_STDIN, " outcnt == %d,", outcnt);
Debug(DB_STDIN, " errno == %d\n", errno);
} while (incnt && outcnt);
Debug(DB_STDIN, "%% Done with while\n", 0);
(void)pclose(fd);
Debug(DB_STDIN, "%% closed pipe, looking for tmpfile\n", 0);
/*
* now open the temporary file and use do_doc to
* convert it to PS
*/
if ((fd = fopen(tmpfile, "r")) == NULL) {
fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile);
perror(MPAGE);
}
else {
/*
* check if the input is UTF-8 or not
*/
if (looks_utf8 (fd))
use_utf8 = 1;
Debug(DB_STDIN, "%% got tmpfile, now do_doc\n", 0);
do_text_doc(fd, asheet, outfd, command);
(void)fclose(fd);
}
/*
* tidy up by removing our temp file
*/
Debug(DB_STDIN, "%% now remove '%s'\n", tmpfile);
(void)unlink(tmpfile);
}
else {
FILE *tfd;
int dont_close = 0;
/*
* store the input to the temporary file to guess encoding correctly
*/
(void)strcpy(tmpfile, "/tmp/mpageXXXXXX");
if ( (tmpfd = mkstemp(tmpfile)) == -1) {
fprintf(stderr, "%s: cannot create temporary file", MPAGE);
tmpfile[0] = 0;
}
close(tmpfd);
if (tmpfile[0] && (tfd = fopen (tmpfile, "w"))) {
do {
incnt = fread(buffer, 1, sizeof buffer, stdin);
outcnt = fwrite(buffer, 1, incnt, tfd);
} while (incnt && outcnt);
fclose (tfd);
if ((fd = fopen(tmpfile, "r")) == NULL) {
fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile);
perror(MPAGE);
/* we should already read the input from stdin.
* so probably it can't recovers
*/
return;
}
} else {
/* try to use stdin */
fd = stdin;
dont_close = 1;
}
/*
* check that the input is whether UTF-8 or not.
*/
if (looks_utf8 (fd))
use_utf8 = 1;
/*
* check for the cutomary flag at the start of postscript files
*/
if (ps_check(fd)) {
/*
* found the flag signaling PS input
*/
Debug(DB_STDIN, "%%do_stdin: is postscript\n", 0);
do_ps_doc(fd, asheet, outfd, "stdin");
}
else {
/*
* no postscript flag, print the ascii text
*/
Debug(DB_STDIN, "%%do_stdin: not postscript\n", 0);
do_text_doc(fd, asheet, outfd, "stdin");
}
if (!dont_close)
fclose (fd);
/* remove the temporary file */
if (tmpfile[0])
(void)unlink(tmpfile);
}
return;
#endif
} /* do_stdin */
/*
* iswanted () returns 1 if the specified page needs to be printed.
* returns 0 if not.
*/
int
iswanted(int sn)
{
int i;
Debug(DB_STDIN, "%%iswanted: opt_jarg: %d\n", opt_jarg);
Debug(DB_STDIN, "%%iswanted: sn: %d\n", sn);
if (!opt_jarg) {
Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn);
ps_outpages++;
return 1;
}
for (i = 0; i < opt_jarg; i++) {
Debug(DB_STDIN, "%%iswanted: i: %d\n", i);
Debug(DB_STDIN, "%%iswanted: opt_first[i]: %d\n", opt_first[i]);
Debug(DB_STDIN, "%%iswanted: opt_alt[i]: %d\n", opt_alt[i]);
Debug(DB_STDIN, "%%iswanted: opt_last[i]: %d\n", opt_last[i]);
if ((sn >= opt_first[i] && (opt_alt[i] <= 1 || (sn - opt_first[i]) % opt_alt[i] == 0) ) &&
(sn <= opt_last[i])) {
Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn);
ps_outpages++;
return 1;
}
}
Debug(DB_STDIN, "%%iswanted: unwanted page %d\n", sn);
return 0;
} /* iswanted */
/*
* do_sheets() is called from do_xxx_doc() to render the sheets;
* it does sheet selection and reversal.
*/
void
do_sheets(sheetfunc, inf, asheet, outf)
int (*sheetfunc)();
FILE *inf;
struct sheet *asheet;
FILE *outf;
{
FILE *nullf = NULL;
register int sheetno;
int max_opt_last;
max_opt_last = 0;
for (sheetno = 0; sheetno < opt_jarg; sheetno++)
if (max_opt_last < opt_last[sheetno])
max_opt_last = opt_last[sheetno];
if (max_opt_last == 0)
max_opt_last = MAXINT;
Debug(DB_STDIN, "%%do_sheets: max_opt_last: %d\n", max_opt_last);
nullf = fopen("/dev/null", "w");
if (opt_reverse) {
FILE *revf;
long *pagebase;
int pageroom;
revf = tmpfile();
if (revf == NULL) {
fprintf(stderr, "%s: can't create temporary file\n", MPAGE);
exit(1);
}
pageroom = 50;
pagebase = (long *)malloc(pageroom * sizeof(long));
if(pagebase == NULL) {
fprintf(stderr, "%s: can't malloc 50 words\n", MPAGE);
exit(1);
}
pagebase[0] = 0;
for (sheetno = 1; sheetno <= max_opt_last; ) {
if ((*sheetfunc)(inf, asheet, iswanted(sheetno) ? revf : nullf)
== FILE_EOF)
break;
if (ferror(revf))
break;
pagebase[sheetno++] = ftell(revf);
if (sheetno >= pageroom) {
pageroom *= 4;
pagebase = (long *)realloc(pagebase, pageroom * sizeof(long));
if (pagebase == NULL) {
fprintf(stderr, "%s: can't malloc %d words\n",
MPAGE, pageroom);
exit(1);
}
}
}
if (ferror(revf))
fprintf(stderr, "%s: error writing to temporary file\n", MPAGE);
else {
pagebase[sheetno] = ftell(revf);
rewind(revf);
while (--sheetno >= 0) {
register int i, n;
char buf[BUFSIZ];
fseek(revf, pagebase[sheetno], 0);
for(i = pagebase[sheetno+1]-pagebase[sheetno]; i>0; i-=n) {
n = i < BUFSIZ ? i : BUFSIZ;
if (fread(buf, n, 1, revf) != 1) {
fprintf(stderr, "%s: Premature EOF on temp file\n",
MPAGE);
break;
}
(void) fwrite(buf, n, 1, outf);
}
}
}
fclose(revf);
free(pagebase);
}
else {
/* Normal, non-reversed pages */
sheetno = 1;
while (sheetno <= max_opt_last &&
(*sheetfunc)(inf, asheet, iswanted(sheetno) ?
outf : nullf) != FILE_EOF)
sheetno++;
}
if (nullf)
fclose(nullf);
return;
} /* do_sheets */
/*
* The below codes are privided for ascmagic.c in file-4.02.
* looks_utf8() function are modified to handle the file handle directly.
*/
/*
* This table reflects a particular philosophy about what constitutes
* "text," and there is room for disagreement about it.
*
* Version 3.31 of the file command considered a file to be ASCII if
* each of its characters was approved by either the isascii() or
* isalpha() function. On most systems, this would mean that any
* file consisting only of characters in the range 0x00 ... 0x7F
* would be called ASCII text, but many systems might reasonably
* consider some characters outside this range to be alphabetic,
* so the file command would call such characters ASCII. It might
* have been more accurate to call this "considered textual on the
* local system" than "ASCII."
*
* It considered a file to be "International language text" if each
* of its characters was either an ASCII printing character (according
* to the real ASCII standard, not the above test), a character in
* the range 0x80 ... 0xFF, or one of the following control characters:
* backspace, tab, line feed, vertical tab, form feed, carriage return,
* escape. No attempt was made to determine the language in which files
* of this type were written.
*
*
* The table below considers a file to be ASCII if all of its characters
* are either ASCII printing characters (again, according to the X3.4
* standard, not isascii()) or any of the following controls: bell,
* backspace, tab, line feed, form feed, carriage return, esc, nextline.
*
* I include bell because some programs (particularly shell scripts)
* use it literally, even though it is rare in normal text. I exclude
* vertical tab because it never seems to be used in real text. I also
* include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
* because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
* character to. It might be more appropriate to include it in the 8859
* set instead of the ASCII set, but it's got to be included in *something*
* we recognize or EBCDIC files aren't going to be considered textual.
* Some old Unix source files use SO/SI (^N/^O) to shift between Greek
* and Latin characters, so these should possibly be allowed. But they
* make a real mess on VT100-style displays if they're not paired properly,
* so we are probably better off not calling them text.
*
* A file is considered to be ISO-8859 text if its characters are all
* either ASCII, according to the above definition, or printing characters
* from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
*
* Finally, a file is considered to be international text from some other
* character code if its characters are all either ISO-8859 (according to
* the above definition) or characters in the range 0x80 ... 0x9F, which
* ISO-8859 considers to be control characters but the IBM PC and Macintosh
* consider to be printing characters.
*/
#define F 0 /* character never appears in text */
#define T 1 /* character appears in plain ASCII text */
#define I 2 /* character appears in ISO-8859 text */
#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
static char text_chars[256] = {
/* BEL BS HT LF FF CR */
F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
/* ESC */
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
/* NEL */
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
};
static int
looks_utf8(FILE *fp)
{
long whence, nbytes;
char *buf = NULL;
int i, n;
unsigned long c;
int gotone = 0;
/* memorize current position */
whence = ftell (fp);
/* check the input size */
fseek (fp, 0L, SEEK_END);
nbytes = ftell (fp) - whence;
/* allocate memories */
buf = (char *) malloc (sizeof (char) * nbytes + 1);
buf[nbytes] = 0;
/* rewind the position */
fseek (fp, 0L, whence);
/* read data */
fread (buf, sizeof (char), nbytes, fp);
/* rewind the position again */
fseek (fp, 0L, whence);
for (i = 0; i < nbytes; i++) {
if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
/*
* Even if the whole file is valid UTF-8 sequences,
* still reject it if it uses weird control characters.
*/
if (text_chars[(int)buf[i]] != T)
return 0;
/* no need to store it
* ubuf[(*ulen)++] = buf[i];
*/
} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
return 0;
} else { /* 11xxxxxx begins UTF-8 */
int following;
if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
c = buf[i] & 0x1f;
following = 1;
} else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
c = buf[i] & 0x0f;
following = 2;
} else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
c = buf[i] & 0x07;
following = 3;
} else if ((buf[i] & 0x04) == 0) { /* 111110xx */
c = buf[i] & 0x03;
following = 4;
} else if ((buf[i] & 0x02) == 0) { /* 1111110x */
c = buf[i] & 0x01;
following = 5;
} else
return 0;
for (n = 0; n < following; n++) {
i++;
if (i >= nbytes)
goto done;
if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
return 0;
c = (c << 6) + (buf[i] & 0x3f);
}
/* no need to store it
* ubuf[(*ulen)++] = c;
*/
gotone = 1;
}
}
done:
if (buf)
free (buf);
return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
}
|