#!/usr/local/bin/perl ##---------------------------------------------------------------------------## ## File: ## $Id: web-archive,v 1.19 2002/03/15 03:54:02 ehood Exp $ ## Description: ## Updates/creates web archives from mailbox archives. ## Run script with '-man' option to view manpage for this program. ##---------------------------------------------------------------------------## ## Copyright (C) 2001-2002 Earl Hood ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##---------------------------------------------------------------------------## package MHArc::web_archive; my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; } use lib "$Dir/../lib"; # Add relative lib to search path use MHArc::Config; my $config = MHArc::Config->load("$Dir/../lib/config.sh"); use Getopt::Long; use MHArc::ListDef; # Load MHonArc library require 'mhamain.pl'; # Regular expression to match mail folder/mboxes my $folder_regex = '\d+(?:-\d+)?'; my $debug = 0; MAIN: { my %opt = ( ); my $clstatus = GetOptions(\%opt, 'alllistsurl=s', # Root to all lists URL. 'debug', # Show what is going on in detail. 'editidx', # Edit archive pages; useful to apply MHonArc resource # changes. 'editidxonly', # Edit archive index pages only. 'editrootidx', # Regen top index. 'home=s', # Pathname of home directory of archive account. 'htmldir=s', # Root directory for html archives. 'htmlurl=s', # Root URL for html archives. 'listsdef=s', # Pathname to list definition file. 'mboxdir=s', # Root directory for mbox archives. 'mboxurl=s', # Root URL for mbox archives. 'mharc=s', # MHonArc resource file for archives. 'mhamaxsize=i', # Maximum MHonArc archive size. 'mhapagesize=i', # Maximum MHonArc index page size. 'mknmz=s', # Pathname to Namazu make search index program. 'mknmzrc=s', # Pathname to Namazu configuration file. 'mknmztmpldir=s', # Pathname to Namazu template directory. 'mnavcgi=s', # Month navigation CGI URL. 'mtimeage=i', # Modify time age of a mailbox file to be considered # for processing. 'nosearch', # Do not update search indexes. 'rebuild', # Rebuild archives from scratch. 'rooturl=s', # Root URL to archives. 'searchcgi=s', # Search CGI URL. 'man', 'help' ); usage(0) unless $clstatus; usage(1) if $opt{'help'}; usage(2) if $opt{'man'}; my $HOME = $opt{'home'} || "$Dir/.."; my $ROOT_URL = $opt{'rooturl'} || $config->{'ROOT_URL'} || "/~mhonarc/archives"; my $LISTS_DEF_FILE = $opt{'listsdef'} || $config->{'LISTS_DEF_FILE'} || "$HOME/lib/lists.def"; my $HTML_DIR = $opt{'htmldir'} || $config->{'HTML_DIR'} || "$HOME/archive/html"; my $HTML_URL = $opt{'htmlurl'} || $config->{'HTML_URL'} || "$ROOT_URL/html"; my $MBOX_DIR = $opt{'mboxdir'} || $config->{'MBOX_DIR'} || "$HOME/archive/mbox"; my $MBOX_URL = $opt{'mboxurl'} || $config->{'MBOX_URL'} || "$ROOT_URL/mbox"; my $MHA_RC = $opt{'mharc'} || $config->{'MHA_RC'} || "$HOME/archive/common.mrc"; my $MHA_MAXSIZE = $opt{'mhamaxsize'} || $ENV{'WA_MAXSIZE'} || 2000; my $MHA_PAGESIZE = $opt{'mhapagesize'} || $ENV{'WA_PAGESIZE'} || 200; my $MTIME_AGE = $opt{'mtimeage'} || $ENV{'WA_MTIME_AGE'} || $config->{'MTIME_AGE'} || 86400; my $MKNMZ = $opt{'mknmz'} || $config->{'MKNMZ'} || '/usr/local/bin/mknmz'; my $MKNMZRC = $opt{'mknmzrc'} || $config->{'MKNMZ_RC'} || "$HOME/archive/cgi-bin/mknmzrc"; my $MKNMZTMPLDIR = $opt{'mknmztmpldir'} || $config->{'MKNMZ_TMPL_DIR'} || "$HOME/archive/cgi-bin/template", my $ALL_LISTS_URL = $opt{'alllistsurl'} || $config->{'ALL_LISTS_URL'} || $HTML_URL; my $MNAV_CGI = $opt{'mnavcgi'} || $config->{'MNAV_CGI'} || join('/', $ROOT_URL,'cgi-bin/mnav.cgi'); my $SEARCH_CGI = $opt{'searchcgi'} || $config->{'SEARCH_CGI'} || join('/', $ROOT_URL,'cgi-bin/namazu.cgi'); my $rebuild = $opt{'rebuild'} || $ENV{'WA_REBUILD'} || 0; my $editidx = $opt{'editidx'} || $ENV{'WA_EDIT'} || 0; my $editidxonly = $opt{'editidxonly'} || 0; my $editrootidx = $opt{'editrootidx'}; my $nosearch = $opt{'nosearch'} || $ENV{'WA_NOSEARCH'} || 0; $debug = $opt{'debug'} || $ENV{'WA_DEBUG'}; my $main_header = $config->{'MAIN_HEADER'} || join('/', $HTML_DIR, '.PNM.head'); my $main_footer = $config->{'MAIN_FOOTER'} || join('/', $HTML_DIR, '.PNM.foot'); my $time = time; if ($rebuild) { $editidx = 0; $editrootidx = 0; } $editidx = 1 if $editidxonly; if ($editidx) { $editrootidx = 0; } if ($debug) { print "HTML_DIR=$HTML_DIR\n", "MBOX_DIR=$MBOX_DIR\n", "MHA_RC=$MHA_RC\n", #"MHA_MAXSIZE=$MHA_MAXSIZE\n", #"MHA_PAGESIZE=$MHA_PAGESIZE\n", "MKNMZ=$MKNMZ\n", "MKNMZRC=$MKNMZRC\n", "MKNMZTMPLDIR=$MKNMZTMPLDIR\n", "MTIME_AGE=$MTIME_AGE\n"; print "rebuild=$rebuild\n", "editidx=$editidx\n", "editidxonly=$editidxonly\n", "nosearch=$nosearch\n", "time=$time\n"; } mhonarc::initialize(); print "MHonArc initialized.\n" if $debug; my $listdef = MHArc::ListDef->new($LISTS_DEF_FILE); print "Loaded lists definitions.\n" if $debug; local(*DIR, *INDEX, *FILE); print "Reading $MBOX_DIR.\n" if $debug; opendir(DIR, $MBOX_DIR) || die qq/Unable to open "$MBOX_DIR": $!/; my @dirs = (); # Get list of archives to process if (@ARGV) { @dirs = @ARGV; } else { @dirs = grep { (-d "$MBOX_DIR/$_") && ($_ ne '.') && ($_ ne '..') } readdir(DIR); closedir(DIR); } my(@months, @folders, @searchfolders); my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime, $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon); print "Lists: ", join(', ', @dirs), "\n" if $debug; foreach $list (@dirs) { print "Processing $list ...\n" if $debug; @folders = (); @searchfolders = (); $cvs = 0; if (!$editidx && !$editrootidx) { # Get list of input mailboxes to process $dir = join('/', $MBOX_DIR, $list); if (!opendir(DIR, $dir)) { warn qq/Unable to open "$dir": $!/; next; } @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR); closedir(DIR); print "Mboxes: ", join(', ', @months), "\n" if $debug; foreach $mon (@months) { $mondir = join('/', $dir, $mon); if ($rebuild) { push(@folders, $mondir); next; } $mtime = (stat($mondir))[9]; print "$mondir mtime: $mtime\n" if $debug; if (($time - $mtime) < $MTIME_AGE) { push(@folders, $mondir); } } next if (!@folders); print "Folders: ", join(', ', @folders), "\n" if $debug; } elsif ($editidx) { # Just editing pages so we get folder list from html directory $dir = join('/', $HTML_DIR, $list); if (!opendir(DIR, $dir)) { warn qq/Unable to open "$dir": $!/; next; } @months = grep { /^$folder_regex$/o } readdir(DIR); closedir(DIR); foreach $mon (@months) { $mondir = join('/', $dir, $mon); push(@folders, $mondir); } next if (!@folders); print "Editidx Folders: ", join(', ', @folders), "\n" if $debug; } @folders = reverse sort @folders; $htmldir = join('/', $HTML_DIR, $list); if ($rebuild) { print "Removing $htmldir\n" if $debug; system('/bin/rm', '-r', $htmldir); } mkdir($htmldir, 0777); $cvs = $list =~ /\.CVS/; #($title) = $list =~ /([^.]+)/; if (defined($listdef->{$list}{'description'})) { $title = join(' ', @{$listdef->{$list}{'description'}}); } else { $title = $list; } if ($cvs) { $title = '[CVS] '.$title; } if (!$editrootidx) { my @mhaargs = ( '-modtime', '-lockmethod', 'flock', #'-maxsize', $MHA_MAXSIZE, #'-idxsize', $MHA_PAGESIZE, '-rcfile', $MHA_RC, #'-outdir' , $htmldir, '-title', "$title (date)", '-ttitle', "$title (thread)", '-definevar', "LIST-NAME=$list", '-definevar', "SEARCH-CGI=$SEARCH_CGI", '-definevar', "MNAV-CGI=$MNAV_CGI", '-definevar', "ALL-LISTS-URL=$ALL_LISTS_URL", ); if ($cvs) { push(@mhaargs, '-nothread'); push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''"); } else { push(@mhaargs, '-thread'); } if ($list =~ /^\./) { push(@mhaargs, '-nothread', '-definevar', "SEARCH-FORM=''"); push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''"); } if ($editidx) { push(@mhaargs, '-editidx'); push(@mhaargs, '-nomsgpgs') if $editidxonly; } if (!$debug && !$rebuild) { push(@mhaargs, '-quiet'); } if (!$rebuild && !$editidx) { push(@mhaargs, '-add'); } my(@fmhaargs); @months = ( ); foreach $folder (@folders) { ($mon = $folder) =~ s/\.gz$//; $mon =~ s/^.*\///; push(@months, $mon); } for ($i=0; $i < @folders; ++$i) { $folder = $folders[$i]; $mon = $months[$i]; $mondir = join('/', $htmldir, $mon); # make sure directory exists mkdir($mondir, 0777); # set final arguments to mhonarc @fmhaargs = ( @mhaargs, '-outdir', $mondir, '-definevar', "CUR-MONTH='$mon'", ); push(@fmhaargs, $folder) unless $editidx; # call mhonarc # XXX: Should exit status be checked? print "Processing archive $mondir...\n" if $debug; print "\tmhonarc options: ", join(' ', @fmhaargs), "\n" if $debug; mhonarc::process_input(@fmhaargs); # update search index # The -Y option is used so we do not have to process all months # to update index. if (!$nosearch && $list !~ /^\./) { push(@searchfolders, $mondir); } } if (@searchfolders) { my @nmzargs = ( $MKNMZ, '--mhonarc', # only do mhonarc pages '-f', $MKNMZRC, # specify resource file '-T', $MKNMZTMPLDIR, # specify template directory '-O', $htmldir, # specify location to place index '-Y' # do not delete existing files ); if (!$debug && !$rebuild) { push(@nmzargs, '--quiet'); } push(@nmzargs, @searchfolders); print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug; # XXX: Should exit status be checked? Even so, what is # recovery options? Any terminal output should be caught by # some log. if (system(@nmzargs)) { warn qq/Warning: Non-zero exit status returned from /, qq/"@nmzargs": $?\n/; } namazu_cleanup($htmldir); } } ## Update monthly index if (!opendir(DIR, $htmldir)) { warn qq/Warning: Unable to open $htmldir for reading: $!\n/; next; } @months = reverse sort grep { /^$folder_regex/o } readdir(DIR); print "Month listing for main index: @months\n" if $debug; closedir(DIR); my $indexhtml = join('/', $htmldir, 'index.html'); if (!open(INDEX, ">$indexhtml.tmp")) { warn qq/Warning: Unable to open $htmldir for reading: $!\n/; next; } my @vars = ( 'SEARCH-CGI' => $SEARCH_CGI, 'LIST-NAME' => $list, 'LIST-DESC' => $title, ); if (-e $main_header) { print "Reading archive home header $main_header\n" if $debug; if (open(FILE, $main_header)) { print INDEX read_template(\*FILE, @vars); close(FILE); } else { warn qq|Warning: Unable to open $main_header: $!\n|; } } print INDEX "\n"; if (-e $main_footer) { print "Reading archive home footer $main_footer\n" if $debug; if (open(FILE, $main_footer)) { print INDEX read_template(\*FILE, @vars); close(FILE); } else { warn qq|Warning: Unable to open $main_footer: $!\n|; } } close(INDEX); if (!rename("$indexhtml.tmp", $indexhtml)) { warn qq|Warning: Unable to rename "$indexhtml.tmp" to |, qq|"$indexhtml": $!\n|; } } } # End: MAIN ############################################################################ sub read_template { my $fh = shift; my %varhash = @_; local $/; my $data = <$fh>; $data =~ s/\$([^\$]+)\$/$varhash{$1}/ge; $data; } sub namazu_cleanup { my $dir = shift; my $lock = join('/', $dir, 'NMZ.lock2'); local(*LOCK); if (!open(LOCK, $lock)) { # no lock file left around, so everything should be okay return; } my $pid = ; close(LOCK); if (!kill(0, $pid)) { warn qq/Warning: Stale "$lock", removing it\n/; if (!unlink($lock)) { warn qq/Warning: Unable to remove "$lock": $!\n/; } } } sub usage { require Pod::Usage; my $verbose = shift; if ($verbose == 0) { Pod::Usage::pod2usage(-verbose => $verbose); } else { my $pager = $ENV{'PAGER'} || 'more'; local(*PAGER); my $fh = (-t STDOUT && open(PAGER, "|$pager")) ? \*PAGER : \*STDOUT; Pod::Usage::pod2usage(-verbose => $verbose, -output => $fh); } } ############################################################################ __END__ =head1 NAME web-archive - Update/create MHonArc archives from mailbox archives =head1 SYNOPSIS web-archive web-archive [options] web-archive [options] [list-name ...] =head1 DESCRIPTION This program is part of the auto-archiving system that works in conjuction with Procmail, Namazu, and a collection of shell and Perl programs. This program has the responsibility of processing the mailbox archives created by the B script to update and/or create MHonArc archives. This program is automatically called by the B script for processing incoming mail within the mail spool if B returns with an okay status. However, this program can be manually invoked to rebuild archives, edit existing archives, or other administrative tasks. Since there may be need to do selective archive processing, any non-option related argument is treated as mailing list archive name to process. =head1 OPTIONS =over =item C<-alllistsurl> I URL to page containing list of all mailing lists archived. If not specified, defaults to value of C<-htmlurl>. =item C<-debug> Show what is going on in detail. =item C<-editidx> Edit archive pages, useful to apply MHonArc resource changes. =item C<-editrootidx> Only regenerate root index pages for archives. This is useful if you make changes to the C<.PNM.head> or C<.PNM.foot> files that you want immediately applied. =item C<-help> Print out usage information. =item C<-home> I Root pathname of archiving software and data. If not specified, the parent directory that contains this program is used. =item C<-htmldir> I Root directory for html archives. If not specified, "C/archive/html>" is used. =item C<-htmlurl> I URL root to HTML archives. If not specified, defaults to C/html>. =item C<-listsdef> I Pathname to mailing lists definition file. If not specified, "C/lib/lists.def>" is used. =item C<-man> Print out entire manpage. =item C<-mboxdir> I Root directory for mbox archives. If not specified, "C/archive/mbox>" is used. =item C<-mharc> I MHonArc resource file for archives. If not specified, "C/archive/common.mrc>" is used. =item C<-mhamaxsize> I Maximum MHonArc archive size. If not specified the value of the C environment variable is used. =item C<-mhapagesize> I Maximum MHonArc index page size. If not specified the value of the C environment variable is used. =item C<-mknmz> I Pathname to Namazu make search index program. If not specified, "C" is used. =item C<-mknmzrc> I Pathname to Namazu configuration file. If not specified, "C/archive/cgi-bin/mknmzrc>" is used. =item C<-mknmztmpldir> I Pathname to Namazu template directory. If not specified, "C/archive/cgi-bin/template>" is used. =item C<-mnavcgi> I URL to monthly navigation cgi program. If not specified, C/cgi-bin/mnav.cgi> is used. =item C<-mtimeage> I Modify time age of a mailbox file to be considered for processing. If not specified the value of the C environment variable is used. =item C<-nosearch> Do not update search indexes. =item C<-rebuild> Rebuild archives from scratch. =item C<-rooturl> I URL root of archives. If not specified, C is used. =item C<-searchcgi> I URL to search cgi program. If not specified, C/cgi-bin/namazu.cgi> is used. =back =head1 ENVIRONMENT =over =item C If set to a true value, detailed information of progress will be printed to stdout. Debugging can also be enabled by the C<-debug> command-line option. =item C If set to a true value, archives will be editted. It is probably better to use the C<-editidx> command-line option instead if archives editing is desired. =item C Maximum MHonArc archive size. The default value is 2000. This setting can be overridden by the C<-mhamaxsize> command-line option. =item C The modification age, in seconds, for a mailbox to be considered for processing. The default value is C<86400> (one day). This setting can be overridden by the C<-mtimeage> command-line option. =item C If set to a true value, the Namazu search indexes will not be updated for archives processed. Disabling of search index updates can also be disabled by the C<-nosearch> command-line option. =item C MHonArc index page size. The default value is 200. This setting can be overridden by the C<-mhapagesize> command-line option. =item C If set to a true value, archives will be rebuilt. It is probably better to use the C<-rebuild> command-line option instead if rebuilding is desired. =back =head1 VERSION $Id: web-archive,v 1.19 2002/03/15 03:54:02 ehood Exp $ =head1 AUTHOR Earl Hood, earl@earlhood.com This program is part of the MHArc archiving system and comes with ABSOLUTELY NO WARRANTY and may be copied only under the terms of the GNU General Public License, which may be found in the MHArc distribution. =cut