diff --git a/.gitignore b/.gitignore index 6dee256..b8e3200 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,6 @@ /web/emperl.js /web/emperl.wasm /web/emperl.data -/web/jquery*.js -/web/codemirror* -/web/normalize.css +/web/cache/ /pages/ /wiki/ diff --git a/cache.pl b/cache.pl new file mode 100755 index 0000000..a2faad3 --- /dev/null +++ b/cache.pl @@ -0,0 +1,68 @@ +#!/usr/bin/env perl +use warnings; +use strict; +use feature 'state'; +use FindBin; +use HTTP::Tiny; +use File::Find qw/find/; +use File::Path qw/make_path/; +use File::Spec::Functions qw/catdir catfile abs2rel rel2abs splitdir/; +use File::Basename qw/fileparse/; +use ExtUtils::MakeMaker qw/prompt/; +require File::Spec::Unix; + +# A quick and dirty script for caching external resources locally + +my $CACHEDIR = catdir($FindBin::Bin,'web','cache'); +make_path $CACHEDIR; + +my @findin = @ARGV ? map {rel2abs($_)} @ARGV : catdir($FindBin::Bin,'web'); + +my @htmlfiles; +find({ follow=>1, + wanted=>sub { + -f && /\.html?\z/i && push @htmlfiles, $File::Find::name; + } }, + @findin ); + +# yes, I know, parsing HTML with regexes, boo, hiss ;-) +# http://www.perlmonks.org/?node_id=1201438 +my $regex = qr{ + < (?:script|link) [^>]+ (?:src|href) \s* = \s* + (? ["'] ) + (?= https?:// (?! localhost\b | 127\.0\.0\.1\b ) ) + \K + (?: (?! \k{q} | > ) . )* + (?= \k{q} ) + }msx; + +for my $fn (@htmlfiles) { + my $html = do { open my $fh, '<:encoding(UTF-8)', $fn or die "$fn: $!"; local $/; <$fh> }; + my $count = $html =~ s/$regex/fetch_resource($fn,$&)/eg; + warn+($count||0)." replacements in $fn\n"; + next unless $count; + next unless prompt("Overwrite $fn? [Yn]","y")=~/^\s*y/i; + open my $ofh, '>:encoding(UTF-8)', $fn or die "$fn: $!"; + print $ofh $html; + close $ofh; +} + +sub fetch_resource { + my ($file,$url) = @_; + state $http = HTTP::Tiny->new; + state %cached; + my ($cachefn) = $url =~ m{/([^/]+)\z} or die $url; + $cachefn = catfile($CACHEDIR, $cachefn); + print STDERR "$url: "; + if (not $cached{$url}) { + my $resp = $http->mirror($url, $cachefn); + die "$resp->{status} $resp->{reason}" unless $resp->{success}; + warn "$resp->{status} $resp->{reason}\n"; + $cached{$url} = $cachefn; + } + else { print STDERR "already fetched\n"; } + my (undef,$path) = fileparse($file); + my $newurl = File::Spec::Unix->catdir(splitdir( abs2rel($cachefn,$path) )); + use Data::Dump; dd $file, $url, $cachefn, $newurl; + return $newurl; +}