Plan 9 from Bell Labs’s /usr/web/sources/contrib/fgb/root/sys/src/cmd/aux/mpage/file.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 * file.c
 */

/*
 * mpage:    a program to reduce pages of print so that several pages
 *           of output appear on one printed page.
 *
 * Copyright (c) 1994-2004 Marcel J.E. Mol, The Netherlands
 * Copyright (c) 1988 Mark P. Hahn, Herndon, Virginia
 *  
 *     Permission is granted to anyone to make or distribute verbatim
 *     copies of this document as received, in any medium, provided
 *     that this copyright notice is preserved, and that the
 *     distributor grants the recipient permission for further
 *     redistribution as permitted by this notice.
 *
 */


#include "mpage.h"


static int looks_utf8(FILE *fp);

/*
 * do_file converts one file into postscript for output.  The file type is
 * determined then the proper conversion routine is selected.
 */
void
do_file(fname, asheet, outfd)
 char *fname;
 struct sheet *asheet;
 FILE *outfd;
{
    FILE *fd;
    int in_type;

    /*
     * Open fname and try to figure out what type of file it is
     */
    if ((fd = fopen(fname, "r")) == NULL) {
        fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname);
        perror(MPAGE);
        return;
    }

    /*
     * if we have the pr option, then we have to assume it's a text file
     */
    if (opt_pr || opt_input == IN_ASCII) 
        in_type = IN_ASCII;
    else {
        /*
         * check for the cutomary characters that flag a postscript file
         */
        if (ps_check(fd))
            in_type = IN_PS;
        else
            in_type = IN_ASCII;
    }

    /*
     * For text input check if input is UTF-8 or not
     */
    if (in_type == IN_ASCII && check_utf8 && looks_utf8 (fd))
        use_utf8 = 1;
    
    (void) fclose(fd);

    if (opt_pr) {
        do_pr_file(fname, asheet, outfd);
        return;
    }


    /*
     * if not using pr(1), open fname and run th file trough the
     * specific processor.
     */
    if ((fd = fopen(fname, "r")) == NULL) {
        fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname);
        perror(MPAGE);
        return;
    }

    switch (in_type) {
	case IN_ASCII:  do_text_doc(fd, asheet, outfd, fname);
			break;
	case IN_PS:     do_ps_doc(fd, asheet, outfd, fname);
			break;
        /* Default figure out ourselfes */
    }

    (void) fclose(fd);

    return;

} /* do_file */



/*
 * do_pr_file processes one text file into postscript, but first runs the file
 * through pr(1).
 */
void
do_pr_file(fname, asheet, outfd)
 char *fname;
 struct sheet *asheet;
 FILE *outfd;
{
    FILE *fd;
    char command[LINESIZE];

    /*
     * build the proper command based upon a specified
     * header or not
     */
#define DASHES "-- "
    if (opt_header != NULL)
        (void)sprintf(command, "%s -l%d -w%d -h \"%s\" %s%s", prprog,
                  asheet->sh_plength, asheet->sh_cwidth, opt_header,
                  fname[0] == '-' ? DASHES : "", fname);
    else
        (void)sprintf(command, "%s -l%d -w%d %s%s", prprog,
                  asheet->sh_plength, asheet->sh_cwidth,
                  fname[0] == '-' ? DASHES : "", fname);
    /*
     * open a pipe to the proper pr(1) command, and pr provides
     * us with the input
     */
    if ((fd = popen(command, "r")) == NULL) {
        fprintf(stderr, "%s: cannot create pipe for '%s'\n", MPAGE, command);
        perror(MPAGE);
    }
    else {
        do_text_doc(fd, asheet, outfd, fname);
        (void)pclose(fd);
    }

    return;

} /* do_pr_file */

#ifdef PLAN9
int
mkstemp(char *template)
{
   char *s;
   int i, fd;

   s = strdup(template);
   if(s == NULL)
       return -1;
   for(i=0; i<20; i++){
       strcpy(s, template);
       mktemp(s);
       if((fd = creat(s, 0666)) >= 0){
           strcpy(template, s);
           free(s);
           return fd;
       }
   }
   free(s);
   return -1;
}
#endif

/*
 * do_stdin uses do_????_doc to process the standard input
 */
void
do_stdin(asheet, outfd)
 struct sheet *asheet;
 FILE *outfd;
{
#if 1
    FILE *fd;
    char buffer[LINESIZE];
    char tmpfile[LINESIZE];
    int incnt, outcnt;
    int tmpfd;

    /*
     * Now the utf8 patch is in we always create a temporary file.
     * So now is the time to just create a temp file and continue
     * as if a filename was passed. This has some minor change
     * on the output pages as it does nit show <stdin> anymore
     * but the tmpfilename
     */
     
    (void) strcpy(tmpfile, "/tmp/mpage-stdin-XXXXXX");
    if ( (tmpfd = mkstemp(tmpfile)) == -1) {
        fprintf(stderr, "%s: cannot create temporary file", MPAGE);
        perror(MPAGE);
        return;
    }
    close(tmpfd);
    if ((fd = fopen (tmpfile, "w")) == NULL) {
        fprintf(stderr, "%s: cannot reopen temporary file", MPAGE);
        perror(MPAGE);
        return;
    } 

    do {
	incnt = fread(buffer, 1, sizeof buffer, stdin);
	outcnt = fwrite(buffer, 1, incnt, fd);
    } while (incnt && outcnt);
    (void) fclose(fd);

    do_file(tmpfile, asheet, outfd);

    (void) unlink(tmpfile);

    return;

#else

    FILE *fd;
    char command[LINESIZE];
    char tmpfile[LINESIZE];
    char buffer[LINESIZE];
    int incnt, outcnt;
    int tmpfd;
    if (opt_pr) {
        Debug(DB_STDIN, "%%do_stdin: pr option selects text\n", 0);
        /*
         * if pr(1) is to be used we need to read the input
         * and pass it to a pr(1) command which will write
         * a temporary file; this temporary file will then
         * be used as input to the do_doc routine
         */
        (void)strcpy(tmpfile, "/tmp/mpageXXXXXX");
        if ( (tmpfd = mkstemp(tmpfile)) == -1) {
            fprintf(stderr, "%s: cannot create temporary file", MPAGE);
            perror(MPAGE);
            return;
	}
        close(tmpfd);
        if (opt_header != NULL)
            (void)sprintf(command, "%s -l%d -w%d -h \"%s\" > %s", prprog,
                      asheet->sh_plength, asheet->sh_cwidth,
                      opt_header, tmpfile);
        else
            (void)sprintf(command, "%s -l%d -w%d > %s", prprog,
                      asheet->sh_plength, asheet->sh_cwidth, tmpfile);
        /*
         * open a pipe to the pr(1) command which will create a
         * temporary file for convertin into PS
         */
        if ((fd = popen(command, "w")) == NULL) {
            fprintf(stderr, "%s: cannot create pipe for '%s'\n",
                MPAGE, command);
            perror(MPAGE);
            return;
        }
#ifdef DEBUG
        errno = 0;
        Debug(DB_STDIN, "%% sizeof buffer == %d\n", sizeof buffer);
#endif
        /*
         * read input to mpage and pass it onto the pr(1) command
         */
        do {
            incnt = fread(buffer, 1, sizeof buffer, stdin);
            outcnt = fwrite(buffer, 1, incnt, fd);
            Debug(DB_STDIN, "%% incnt == %d,", incnt);
            Debug(DB_STDIN, " outcnt == %d,", outcnt);
            Debug(DB_STDIN, " errno == %d\n", errno);
        } while (incnt && outcnt);
        Debug(DB_STDIN, "%% Done with while\n", 0);
        (void)pclose(fd);
        Debug(DB_STDIN, "%% closed pipe, looking for tmpfile\n", 0);
        /*
         * now open the temporary file and use do_doc to
         * convert it to PS
         */
        if ((fd = fopen(tmpfile, "r")) == NULL) {
            fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile);
            perror(MPAGE);
        }
        else {
	    /*
	     * check if the input is UTF-8 or not
	     */
	    if (looks_utf8 (fd))
		use_utf8 = 1;
            Debug(DB_STDIN, "%% got tmpfile, now do_doc\n", 0);
            do_text_doc(fd, asheet, outfd, command);
            (void)fclose(fd);
        }
        /*
         * tidy up by removing our temp file
         */
        Debug(DB_STDIN, "%% now remove '%s'\n", tmpfile);
        (void)unlink(tmpfile);
    }
    else {
	FILE *tfd;
	int dont_close = 0;

	/*
	 * store the input to the temporary file to guess encoding correctly
	 */
        (void)strcpy(tmpfile, "/tmp/mpageXXXXXX");
        if ( (tmpfd = mkstemp(tmpfile)) == -1) {
            fprintf(stderr, "%s: cannot create temporary file", MPAGE);
	    tmpfile[0] = 0;
	}
        close(tmpfd);
	if (tmpfile[0] && (tfd = fopen (tmpfile, "w"))) {
	    do {
		incnt = fread(buffer, 1, sizeof buffer, stdin);
		outcnt = fwrite(buffer, 1, incnt, tfd);
	    } while (incnt && outcnt);
	    fclose (tfd);
	    if ((fd = fopen(tmpfile, "r")) == NULL) {
		fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile);
		perror(MPAGE);
		/* we should already read the input from stdin.
		 * so probably it can't recovers
		 */
		return;
	    }
	} else {
	    /* try to use stdin */
	    fd = stdin;
	    dont_close = 1;
	}
        /*
         * check that the input is whether UTF-8 or not.
         */
        if (looks_utf8 (fd))
	    use_utf8 = 1;
        /*
         * check for the cutomary flag at the start of postscript files
         */
        if (ps_check(fd)) {
            /*
             * found the flag signaling PS input
             */
            Debug(DB_STDIN, "%%do_stdin: is postscript\n", 0);
            do_ps_doc(fd, asheet, outfd, "stdin");
        }
        else {
            /*
             * no postscript flag, print the ascii text
             */
            Debug(DB_STDIN, "%%do_stdin: not postscript\n", 0);
            do_text_doc(fd, asheet, outfd, "stdin");
        }
	if (!dont_close)
	    fclose (fd);
	/* remove the temporary file */
	if (tmpfile[0])
	    (void)unlink(tmpfile);
    }

    return;
#endif

} /* do_stdin */



/*
 * iswanted () returns 1 if the specified page needs to be printed.
 *             returns 0 if not.
 */
int
iswanted(int sn)
{
    int i;

    Debug(DB_STDIN, "%%iswanted: opt_jarg: %d\n", opt_jarg);
    Debug(DB_STDIN, "%%iswanted: sn: %d\n", sn);
    if (!opt_jarg) {
        Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn);
        ps_outpages++;
        return 1;
    }
    for (i = 0; i < opt_jarg; i++) {
        Debug(DB_STDIN, "%%iswanted: i: %d\n", i);
        Debug(DB_STDIN, "%%iswanted: opt_first[i]: %d\n", opt_first[i]);
        Debug(DB_STDIN, "%%iswanted: opt_alt[i]: %d\n", opt_alt[i]);
        Debug(DB_STDIN, "%%iswanted: opt_last[i]: %d\n", opt_last[i]);
        if ((sn >= opt_first[i] && (opt_alt[i] <= 1 || (sn - opt_first[i]) % opt_alt[i] == 0) ) &&
            (sn <= opt_last[i])) {
            Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn);
            ps_outpages++;
            return 1;
        }
    }
    Debug(DB_STDIN, "%%iswanted: unwanted page %d\n", sn);

    return 0;

} /* iswanted */



/*
 * do_sheets() is called from do_xxx_doc() to render the sheets;
 * it does sheet selection and reversal.
 */
void
do_sheets(sheetfunc, inf, asheet, outf)
    int (*sheetfunc)();
    FILE *inf;
    struct sheet *asheet;
    FILE *outf;
{
    FILE *nullf = NULL;
    register int sheetno;
    int max_opt_last;

    max_opt_last = 0;
    for (sheetno = 0; sheetno < opt_jarg; sheetno++)
        if (max_opt_last < opt_last[sheetno])
            max_opt_last = opt_last[sheetno];
    if (max_opt_last == 0)
        max_opt_last = MAXINT;
 
    Debug(DB_STDIN, "%%do_sheets: max_opt_last: %d\n", max_opt_last);
 
    nullf = fopen("/dev/null", "w");
 
    if (opt_reverse) {
        FILE *revf;
        long *pagebase;
        int pageroom;

        revf = tmpfile();
        if (revf == NULL) {
            fprintf(stderr, "%s: can't create temporary file\n", MPAGE);
            exit(1);
        }
        pageroom = 50;
        pagebase = (long *)malloc(pageroom * sizeof(long));
        if(pagebase == NULL) {
            fprintf(stderr, "%s: can't malloc 50 words\n", MPAGE);
            exit(1);
        }
        pagebase[0] = 0;

        for (sheetno = 1; sheetno <= max_opt_last; ) {
            if ((*sheetfunc)(inf, asheet, iswanted(sheetno) ? revf : nullf)
                  == FILE_EOF)
                break;

            if (ferror(revf))
                break;

            pagebase[sheetno++] = ftell(revf);
            if (sheetno >= pageroom) {
                pageroom *= 4;
                pagebase = (long *)realloc(pagebase, pageroom * sizeof(long));
                if (pagebase == NULL) {
                    fprintf(stderr, "%s: can't malloc %d words\n",
                                    MPAGE, pageroom);
                    exit(1);
                }
        
            }
        }

        if (ferror(revf))
            fprintf(stderr, "%s: error writing to temporary file\n", MPAGE);
        else {
            pagebase[sheetno] = ftell(revf);
            rewind(revf);

            while (--sheetno >= 0) {
                register int i, n;
                char buf[BUFSIZ];

                fseek(revf, pagebase[sheetno], 0);
                for(i = pagebase[sheetno+1]-pagebase[sheetno]; i>0; i-=n) {
                    n = i < BUFSIZ ? i : BUFSIZ;
                    if (fread(buf, n, 1, revf) != 1) {
                        fprintf(stderr, "%s: Premature EOF on temp file\n",
                        MPAGE);
                        break;
                    }
                    (void) fwrite(buf, n, 1, outf);
                }
            }
        }
        fclose(revf);
        free(pagebase);

    }
    else {
        /* Normal, non-reversed pages */
        sheetno = 1;
        while (sheetno <= max_opt_last &&
               (*sheetfunc)(inf, asheet, iswanted(sheetno) ?
                        outf : nullf) != FILE_EOF)
            sheetno++;
    }

    if (nullf)
        fclose(nullf);

    return;

} /* do_sheets */

/*
 * The below codes are privided for ascmagic.c in file-4.02.
 * looks_utf8() function are modified to handle the file handle directly.
 */
/*
 * This table reflects a particular philosophy about what constitutes
 * "text," and there is room for disagreement about it.
 *
 * Version 3.31 of the file command considered a file to be ASCII if
 * each of its characters was approved by either the isascii() or
 * isalpha() function.  On most systems, this would mean that any
 * file consisting only of characters in the range 0x00 ... 0x7F
 * would be called ASCII text, but many systems might reasonably
 * consider some characters outside this range to be alphabetic,
 * so the file command would call such characters ASCII.  It might
 * have been more accurate to call this "considered textual on the
 * local system" than "ASCII."
 *
 * It considered a file to be "International language text" if each
 * of its characters was either an ASCII printing character (according
 * to the real ASCII standard, not the above test), a character in
 * the range 0x80 ... 0xFF, or one of the following control characters:
 * backspace, tab, line feed, vertical tab, form feed, carriage return,
 * escape.  No attempt was made to determine the language in which files
 * of this type were written.
 *
 *
 * The table below considers a file to be ASCII if all of its characters
 * are either ASCII printing characters (again, according to the X3.4
 * standard, not isascii()) or any of the following controls: bell,
 * backspace, tab, line feed, form feed, carriage return, esc, nextline.
 *
 * I include bell because some programs (particularly shell scripts)
 * use it literally, even though it is rare in normal text.  I exclude
 * vertical tab because it never seems to be used in real text.  I also
 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
 * character to.  It might be more appropriate to include it in the 8859
 * set instead of the ASCII set, but it's got to be included in *something*
 * we recognize or EBCDIC files aren't going to be considered textual.
 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
 * and Latin characters, so these should possibly be allowed.  But they
 * make a real mess on VT100-style displays if they're not paired properly,
 * so we are probably better off not calling them text.
 *
 * A file is considered to be ISO-8859 text if its characters are all
 * either ASCII, according to the above definition, or printing characters
 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
 *
 * Finally, a file is considered to be international text from some other
 * character code if its characters are all either ISO-8859 (according to
 * the above definition) or characters in the range 0x80 ... 0x9F, which
 * ISO-8859 considers to be control characters but the IBM PC and Macintosh
 * consider to be printing characters.
 */

#define F 0   /* character never appears in text */
#define T 1   /* character appears in plain ASCII text */
#define I 2   /* character appears in ISO-8859 text */
#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */

static char text_chars[256] = {
	/*                  BEL BS HT LF    FF CR    */
	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
        /*                              ESC          */
	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
	/*            NEL                            */
	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
};

static int
looks_utf8(FILE *fp)
{
	long whence, nbytes;
	char *buf = NULL;
	int i, n;
	unsigned long c;
	int gotone = 0;

	/* memorize current position */
	whence = ftell (fp);
	/* check the input size */
	fseek (fp, 0L, SEEK_END);
	nbytes = ftell (fp) - whence;
	/* allocate memories */
	buf = (char *) malloc (sizeof (char) * nbytes + 1);
	buf[nbytes] = 0;
	/* rewind the position */
	fseek (fp, 0L, whence);
	/* read data */
	fread (buf, sizeof (char), nbytes, fp);
	/* rewind the position again */
	fseek (fp, 0L, whence);

	for (i = 0; i < nbytes; i++) {
		if ((buf[i] & 0x80) == 0) {	   /* 0xxxxxxx is plain ASCII */
			/*
			 * Even if the whole file is valid UTF-8 sequences,
			 * still reject it if it uses weird control characters.
			 */

			if (text_chars[(int)buf[i]] != T)
				return 0;

			/* no need to store it
			 * ubuf[(*ulen)++] = buf[i];
			 */
		} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
			return 0;
		} else {			   /* 11xxxxxx begins UTF-8 */
			int following;

			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
				c = buf[i] & 0x1f;
				following = 1;
			} else if ((buf[i] & 0x10) == 0) {	/* 1110xxxx */
				c = buf[i] & 0x0f;
				following = 2;
			} else if ((buf[i] & 0x08) == 0) {	/* 11110xxx */
				c = buf[i] & 0x07;
				following = 3;
			} else if ((buf[i] & 0x04) == 0) {	/* 111110xx */
				c = buf[i] & 0x03;
				following = 4;
			} else if ((buf[i] & 0x02) == 0) {	/* 1111110x */
				c = buf[i] & 0x01;
				following = 5;
			} else
				return 0;

			for (n = 0; n < following; n++) {
				i++;
				if (i >= nbytes)
					goto done;

				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
					return 0;

				c = (c << 6) + (buf[i] & 0x3f);
			}

			/* no need to store it
			 * ubuf[(*ulen)++] = c;
			 */
			gotone = 1;
		}
	}
done:
	if (buf)
	  free (buf);

	return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].