Plan 9 from Bell Labs’s /usr/web/sources/contrib/gabidiaz/root/sys/src/cmd/perl/Porting/checkURL.pl

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#!/usr/bin/perl

use strict;
use warnings 'all';

use LWP::Simple qw /$ua getstore/;

my %urls;

my @dummy = qw(
	   http://something.here
	   http://www.pvhp.com
	      );
my %dummy;

@dummy{@dummy} = ();

foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) {
    open my $fh => $file or die "Failed to open $file: $!\n";
    while (<$fh>) {
        if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) {
            my $url = $&;
            $url =~ s/\.$//;
            $urls {$url} ||= { };
            $urls {$url} {$file} = 1;
        }
    }
    close $fh;
}

sub fisher_yates_shuffle {
    my $deck = shift;  # $deck is a reference to an array
    my $i = @$deck;
    while (--$i) {
	my $j = int rand ($i+1);
	@$deck[$i,$j] = @$deck[$j,$i];
    }
}

my @urls = keys %urls;

fisher_yates_shuffle(\@urls);

sub todo {
    warn "(", scalar @urls, " URLs)\n";
}

my $MAXPROC = 40;
my $MAXURL  = 10;
my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL;

select(STDERR); $| = 1;
select(STDOUT); $| = 1;

while (@urls) {
    my @list;
    my $pid;
    my $i;

    todo();

    for ($i = 0; $i < $MAXFORK; $i++) {
	$list[$i] = [ splice @urls, 0, $MAXURL ];
	$pid = fork;
	die "Failed to fork: $!\n" unless defined $pid;
	last unless $pid; # Child.
    }

    if ($pid) {
        # Parent.
	warn "(waiting)\n";
	1 until -1 == wait; # Reap.
    } else {
        # Child.
        foreach my $url (@{$list[$i]}) {
            my $code = getstore $url, "/dev/null";
            next if $code == 200;
            my $f = join ", " => keys %{$urls {$url}};
            printf "%03d  %s: %s\n" => $code, $url, $f;
        }

        exit;
    }
}

__END__

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].