#!/usr/local/bin/perl
##---------------------------------------------------------------------------##
##  File:
##	$Id: web-archive,v 1.31 2002/09/03 16:30:47 ehood Exp $
##  Description:
##	Updates/creates web archives from mailbox archives.
##	Run script with '-man' option to view manpage for this program.
##---------------------------------------------------------------------------##
##  Copyright (C) 2001-2002	Earl Hood <earl@earlhood.com>
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##  
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##  
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##  02111-1307, USA
##---------------------------------------------------------------------------##

package MHArc::web_archive;

my $Dir;
BEGIN {
  $Dir = `dirname $0`; chomp $Dir;
}
use lib "$Dir/../lib";  # Add relative lib to search path

use MHArc::Config;
my $config = MHArc::Config->load("$Dir/../lib/config.sh");


use Getopt::Long;
use MHArc::ListDef;
use MHArc::Util qw( usage );

# Load MHonArc library
require 'mhamain.pl';

my $debug = 0;

# Regular expression to match mail folder/mboxes
my $folder_regex = '\d+(?:-\d+)?';

MAIN: {
  my %opt = ( );
  my $clstatus = GetOptions(\%opt,
    'alllistsurl=s',	# Root to all lists URL.
    'alllistsindex=s',	# Pathname to all lists index page.
    'debug|verbose',    # Show what is going on in detail.
    'editidx',		# Edit archive pages; useful to apply MHonArc resource
		        # changes.
    'editallidx',	# Regen all lists index.
    'editidxonly',	# Edit archive index pages only.
    'editrootidx',	# Regen top index.
    'home=s',		# Pathname of home directory of archive account.
    'htmldir=s',	# Root directory for html archives.
    'htmlurl=s',	# Root URL for html archives.
    'listsdef=s',	# Pathname to list definition file.
    'mboxdir=s',	# Root directory for mbox archives.
    'mboxurl=s',	# Root URL for mbox archives.
    'mharc=s',		# MHonArc resource file for archives.
    'mhamaxsize=i',	# Maximum MHonArc archive size.
    'mhapagesize=i',	# Maximum MHonArc index page size.
    'mknmz=s',		# Pathname to Namazu make search index program.
    'mknmzrc=s',	# Pathname to Namazu configuration file.
    'mknmztmpldir=s',	# Pathname to Namazu template directory.
    'mesgcgi=s',	# Message CGI URL.
    'mnavcgi=s',	# Month navigation CGI URL.
    'mtimeage=i',	# Modify time age of a mailbox file to be considered
			# for processing.
    'nosearch', 	# Do not update search indexes.
    'rebuild',		# Rebuild archives from scratch.
    'rooturl=s',	# Root URL to archives.
    'searchcgi=s',	# Search CGI URL.

    'man',
    'help'
  );
  usage(0) unless $clstatus;
  usage(1) if $opt{'help'};
  usage(2) if $opt{'man'};

  my $HOME		= $opt{'home'} ||
				$config->{'SW_ROOT'} ||
				"$Dir/..";
  my $ROOT_URL 		= $opt{'rooturl'} ||
				$config->{'ROOT_URL'} ||
				"/~mhonarc/archives";
  my $LISTS_DEF_FILE	= $opt{'listsdef'} ||
				$config->{'LISTS_DEF_FILE'} ||
				"$HOME/lib/lists.def";
  my $HTML_DIR 		= $opt{'htmldir'} ||
				$config->{'HTML_DIR'} ||
				"$HOME/html";
  my $HTML_URL 		= $opt{'htmlurl'} ||
				$config->{'HTML_URL'} ||
				"$ROOT_URL/html";
  my $MBOX_DIR 		= $opt{'mboxdir'} ||
				$config->{'MBOX_DIR'} ||
				"$HOME/mbox";
  my $MBOX_URL 		= $opt{'mboxurl'} ||
				$config->{'MBOX_URL'} ||
				"$ROOT_URL/mbox";
  my $INFO_DIR 		= $opt{'infodir'} ||
				$config->{'INFO_DIR'} ||
				"$HOME/info";
  my $INFO_URL 		= $opt{'infourl'} ||
				$config->{'INFO_URL'} ||
				"$ROOT_URL/info";
  my $MHA_RC   		= $opt{'mharc'} ||
				$config->{'MHA_RC'} ||
				"$HOME/lib/common.mrc";
  my $MHA_RC_DIR	= $opt{'mharcdir'} ||
				$config->{'MHA_RC_DIR'} ||
				"$HOME/lib/mrc";
  my $MHA_MAXSIZE 	= $opt{'mhamaxsize'} ||
				$ENV{'WA_MAXSIZE'} ||
				2000;
  my $MHA_PAGESIZE 	= $opt{'mhapagesize'} ||
				$ENV{'WA_PAGESIZE'} ||
				200;
  my $MTIME_AGE 	= $opt{'mtimeage'} ||
				$ENV{'WA_MTIME_AGE'} ||
				$config->{'MTIME_AGE'} ||
				86400;
  my $MKNMZ		= $opt{'mknmz'} ||
				$config->{'MKNMZ'} ||
				'/usr/local/bin/mknmz';
  my $MKNMZRC		= $opt{'mknmzrc'} ||
				$config->{'MKNMZ_RC'} ||
				"$HOME/cgi-bin/mknmzrc";
  my $MKNMZTMPLDIR	= $opt{'mknmztmpldir'} ||
				$config->{'MKNMZ_TMPL_DIR'} ||
				"$HOME/cgi-bin/template",
  my $ALL_LISTS_URL	= $opt{'alllistsurl'} ||
				$config->{'ALL_LISTS_URL'} ||
				$HTML_URL;
  my $MESG_CGI		= $opt{'mesgcgi'} ||
				$config->{'MESG_CGI'} ||
				join('/', $ROOT_URL,'cgi-bin/mesg.cgi');
  my $MNAV_CGI		= $opt{'mnavcgi'} ||
				$config->{'MNAV_CGI'} ||
				join('/', $ROOT_URL,'cgi-bin/mnav.cgi');
  my $SEARCH_CGI	= $opt{'searchcgi'} ||
				$config->{'SEARCH_CGI'} ||
				join('/', $ROOT_URL,'cgi-bin/namazu.cgi');
  my $EXTRACT_CGI	= $opt{'extractchcgi'} ||
				$config->{'EXTRACT_CGI'} ||
				join('/', $ROOT_URL,'cgi-bin/extract-mesg.cgi');

  my $rebuild  		= $opt{'rebuild'} ||
				$ENV{'WA_REBUILD'} || 0;
  my $editidx  		= $opt{'editidx'} ||
				$ENV{'WA_EDIT'} || 0;
  my $editidxonly  	= $opt{'editidxonly'} || 0;
  my $editrootidx 	= $opt{'editrootidx'};
  my $editallidx 	= $opt{'editallidx'};
  my $nosearch 		= $opt{'nosearch'} ||
				$ENV{'WA_NOSEARCH'} || 0;
     $debug    		= $opt{'debug'} ||
				$ENV{'WA_DEBUG'};

  my $all_index         = $opt{'alllistsindex'} ||
			      $config->{'ALL_LISTS_INDEX'} ||
			      join('/', $HTML_DIR, 'lists.html');
  my $main_header	= $config->{'MAIN_HEADER'} ||
			      join('/', $HTML_DIR, '.PNM.head');
  my $main_footer	= $config->{'MAIN_FOOTER'} ||
			      join('/', $HTML_DIR, '.PNM.foot');

  my $time = time;
  if ($rebuild) {
    $editidx = 0;
    $editrootidx = 0;
    $editallidx = 0;
  }
  $editidx = 1  if $editidxonly;
  if ($editidx) {
    $editrootidx = 0;
    $editallidx = 0;
  }

  my $listdef = MHArc::ListDef->new($LISTS_DEF_FILE);
  print "Loaded lists definitions.\n"  if $debug;

  if ($editallidx) {
    update_archive_index(
      '-config'   => $config,
      '-listdef'  => $listdef,
      '-htmldir'  => $HTML_DIR,
      '-htmlurl'  => $HTML_URL,
      '-infodir'  => $INFO_DIR,
      '-infourl'  => $INFO_URL,
      '-allindex' => $all_index
    );
    last MAIN;
  }

  mhonarc::initialize();
  print "MHonArc initialized.\n"  if $debug;

  local(*DIR, *INDEX, *FILE);

  print "Reading $MBOX_DIR.\n"  if $debug;
  opendir(DIR, $MBOX_DIR) || die qq/Unable to open "$MBOX_DIR": $!/;
  my @dirs = ();

  # Get list of archives to process
  if (@ARGV) {
    @dirs = @ARGV;
  } else {
    @dirs = grep { (-d "$MBOX_DIR/$_") &&
		    ($_ ne '.') &&
		    ($_ ne '..')
		  } readdir(DIR);
    closedir(DIR);
  }

  my(@months, @folders);
  my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
     $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
     $disable_search, $listname, $short_title);

  print "Lists: ", join(', ', @dirs), "\n"  if $debug;
  foreach $list (@dirs) {
    print "Processing $list ...\n"  if $debug;

    @folders = ();
    $cvs = 0;
    if (!$editidx && !$editrootidx) {
      # Get list of input mailboxes to process

      $dir = join('/', $MBOX_DIR, $list);
      if (!opendir(DIR, $dir)) {
	warn qq/Unable to open "$dir": $!/;
	next;
      }
      @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
      closedir(DIR);
      print "Mboxes: ", join(', ', @months), "\n"  if $debug;

      foreach $mon (@months) {
	$mondir = join('/', $dir, $mon);
	if ($rebuild) {
	  push(@folders, $mondir);
	  next;
	}
	$mtime = (stat($mondir))[9];
	print "$mondir mtime: $mtime\n"  if $debug;
	if (($time - $mtime) < $MTIME_AGE) {
	  push(@folders, $mondir);
	}
      }

      next  if (!@folders);
      print "Folders: ", join(', ', @folders), "\n"  if $debug;

    } elsif ($editidx) {
      # Just editing pages so we get folder list from html directory
      $dir = join('/', $HTML_DIR, $list);
      if (!opendir(DIR, $dir)) {
	warn qq/Unable to open "$dir": $!/;
	next;
      }
      @months = grep { /^$folder_regex$/o } readdir(DIR);
      closedir(DIR);

      foreach $mon (@months) {
	$mondir = join('/', $dir, $mon);
	push(@folders, $mondir);
      }
      next  if (!@folders);
      print "Editidx Folders: ", join(', ', @folders), "\n"  if $debug;
    }
    @folders = reverse sort @folders;

    $htmldir = join('/', $HTML_DIR, $list);
    if ($rebuild) {
      print "Removing $htmldir\n"  if $debug;
      system('/bin/rm', '-r', $htmldir);
    }
    mkdir($htmldir, 0777);

    my $listname = $list;
    $cvs = ($listname =~ s/\.CVS$//);

    $disable_search = ($list =~ /^\./) ||
		      ((defined($listdef->{$listname}{'no-search'}) &&
		        $listdef->{$listname}{'no-search'}[0]));

    if (defined($listdef->{$listname}{'description'})) {
      $title = join(' ', @{$listdef->{$listname}{'description'}});
    } else {
      $title = $listname;
    }
    $short_title = $listname;
    if ($cvs) {
      $title = '[CVS] '.$title;
      $short_title = '[CVS] '.$short_title;
    }

    if (!$editrootidx) {
      my @mhaargs = (
	'-modtime',
	'-lockmethod', 'flock',
	#'-maxsize', $MHA_MAXSIZE,
	#'-idxsize', $MHA_PAGESIZE,
	'-rcfile', $MHA_RC,
	#'-outdir' , $htmldir,
	'-title', "$title (date)",
	'-ttitle', "$title (thread)",
	'-definevar', "LIST-TITLE='$short_title'",
	'-definevar', "LIST-NAME='$list'",
	'-definevar', "SEARCH-CGI=$SEARCH_CGI",
	'-definevar', "PNAV-CGI=$MNAV_CGI",
	'-definevar', "EXTRACT-CGI=$EXTRACT_CGI",
	'-definevar', "MESG-CGI=$MESG_CGI",
	'-definevar', "ALL-LISTS-URL=$ALL_LISTS_URL",

	'-definevar', "MNAV-CGI=$MNAV_CGI", # backwards compatibility
      );

      if (-e "$MHA_RC_DIR/$list.mrc") {
	push(@mhaargs, '-rcfile', "$MHA_RC_DIR/$list.mrc");
      }
      if ($cvs) {
	push(@mhaargs, '-nothread');
	push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
      } else {
	push(@mhaargs, '-thread');
      }
      if ($list =~ /^\./) {
	push(@mhaargs,
	     '-nothread',
	     '-definevar', "SEARCH-FORM=''");
	push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
      }
      if ($editidx) {
	push(@mhaargs, '-editidx');
	push(@mhaargs, '-nomsgpgs')  if $editidxonly;
      }
      if (defined($config->{'MSG_DATE_FIELDS'})) {
	push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'});
      }
      if (!$debug && !$rebuild) {
	push(@mhaargs, '-quiet');
      }
      if (!$rebuild && !$editidx) {
	push(@mhaargs, '-add');
      }

      # add any custom options specified in definition file
      if (defined($listdef->{$listname}{'mhonarc-options'})) {
	require 'shellwords.pl';
	push(@mhaargs,
	   shellwords(join(' ', @{$listdef->{$listname}{'mhonarc-options'}})));
      }

      # if searching is disabled, zero-out $SEARCH-FORM$
      if ($disable_search) {
	push(@mhaargs, '-definevar', "SEARCH-FORM=''");
      }

      my(@fmhaargs);
      @months = ( );
      foreach $folder (@folders) {
	($mon = $folder) =~ s/\.gz$//;
	$mon =~ s/^.*\///;
	push(@months, $mon);
      }

      my $cur_msg_cnt;
      for ($i=0; $i < @folders; ++$i) {
        $folder = $folders[$i];
	$mon = $months[$i];
	$mondir = join('/', $htmldir, $mon);

	# make sure directory exists
	mkdir($mondir, 0777);

	# set final arguments to mhonarc
	@fmhaargs = (
	  @mhaargs,
	  '-outdir', $mondir,
	  '-definevar', "CUR-PERIOD='$mon'",

	  '-definevar', "CUR-MONTH='$mon'", # backwards compatibility
	);
	push(@fmhaargs, $folder)  unless $editidx;

	# call mhonarc
	print "Processing archive $mondir...\n"  if $debug;
	print "\tmhonarc options: ", join(' ', @fmhaargs), "\n"  if $debug;
	if (!mhonarc::open_archive(@fmhaargs)) {
	  warn qq/Warning: Unable to open "$mondir" archive: /,
	       qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
	  next;
	}
	$cur_msg_cnt = $mhonarc::NumOfMsgs || 0;
	mhonarc::process_input();
	if ($mhonarc::CODE != 0) {
	  warn qq/Warning: Problem processing "$mondir": /,
	       qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
	  next;
	}
	if ($cur_msg_cnt == $mhonarc::NumOfMsgs) {
	  print "Skipping search index, no new messages in archive\n"
	      if $debug;
	  next;
	}

	# update search index
	# The -Y option is used so we do not have to process all months
	# to update index.
	if (!$nosearch && !$disable_search) {
	  my @nmzargs = (
		 $MKNMZ,
		 '--mhonarc',		# only do mhonarc pages
		 '-f', $MKNMZRC,   	# specify resource file
		 '-T', $MKNMZTMPLDIR,	# specify template directory
		 '-O', $htmldir,	# specify location to place index
		 '-Y'			# do not delete existing files
	  );
	  if (!$debug && !$rebuild) {
	    push(@nmzargs, '--quiet');
	  }
	  push(@nmzargs, $mondir);
	  print "Search Index Command: ", join(" ", @nmzargs), "\n"  if $debug;

	  if (system(@nmzargs)) {
	    warn qq/Warning: Non-zero exit status returned from /,
		 qq/"@nmzargs": $?\n/;
	  }
	  namazu_cleanup($htmldir);
	}
      }
    }

    ## Update monthly index
    if (!opendir(DIR, $htmldir)) {
      warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
      next;
    }
    @months = reverse sort grep { /^$folder_regex/o } readdir(DIR);
    print "Month listing for main index: @months\n"  if $debug;
    closedir(DIR);
    my $indexhtml = join('/', $htmldir, 'index.html');
    if (!open(INDEX, ">$indexhtml.tmp")) {
      warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
      next;
    }

    my @vars = (
      '-nosearch'  => $disable_search,
      'SEARCH-CGI' => $SEARCH_CGI,
      'LIST-TITLE' => $short_title,
      'LIST-NAME'  => $list,
      'LIST-DESC'  => $title,
    );
    print_template(\*INDEX, $main_header, @vars);
    print INDEX "<ul>\n";
    foreach $mon (@months) {
      print INDEX qq|<li><b>$mon</b>:|;
      print INDEX qq|&nbsp;&nbsp;<a href="$mon/index.html">[Date]</a>|
	  if (-e join('/', $htmldir, $mon, 'index.html'));
      print INDEX qq|&nbsp;&nbsp;<a href="$mon/threads.html">[Thread]</a>|
	  if (-e join('/', $htmldir, $mon, 'threads.html'));

      if (!$listdef->{$listname}{'no-raw-link'}[0]) {
	my $raw_label = '[Raw: ]';
	my $compressed = 0;
	my $mbox_file = join('/', $MBOX_DIR, $list, $mon);
	my $mbox_url  = join('/', $MBOX_URL, $list, $mon);
	if (! -e $mbox_file) {
	  $mbox_file .= ".gz";
	  $mbox_url  .= ".gz";
	  $compressed = 1;
	}
	if (-e $mbox_file) {
	  print INDEX qq|&nbsp;&nbsp;<a href="$mbox_url">[mbox: |,
		      (-s _), qq| bytes|;
	  print INDEX qq|, gzipped|  if $compressed;
	  print INDEX qq|]</a>|;
	}
      }
      print INDEX qq|</li>\n|;
    }
    print INDEX "</ul>\n";
    print_template(\*INDEX, $main_footer, @vars);
    close(INDEX);
    if (!rename("$indexhtml.tmp", $indexhtml)) {
      warn qq|Warning: Unable to rename "$indexhtml.tmp" to |,
	   qq|"$indexhtml": $!\n|;
    }
  }

  update_archive_index(
    '-config'   => $config,
    '-listdef'  => $listdef,
    '-htmldir'  => $HTML_DIR,
    '-htmlurl'  => $HTML_URL,
    '-infodir'  => $INFO_DIR,
    '-infourl'  => $INFO_URL,
    '-allindex' => $all_index
  );

} # End: MAIN

############################################################################

sub entify {
  my $str = shift;
  $str =~ s/\&/\&amp;/;
  $str =~ s/</\&lt;/;
  $str =~ s/>/\&gt;/;
  $str;
}

sub get_periods {
  my $dir = shift;

  local(*DIR);
  if (!opendir(DIR, $dir)) {
    warn qq/Warning: Unable to open "$dir": $!/;
    return ( );
  }
  my @months = reverse sort grep { /^$folder_regex$/o } readdir(DIR);
  closedir(DIR);
  @months;
}

sub read_template {
  my $fh	= shift;
  my %varhash	= @_;
  my $data = "";

  if ($varhash{'-nosearch'}) {
    local $_;
    my $ignore = 0;
    while (<$fh>) {
      if ($ignore) {
	$ignore = 0  if /<!--\/x-search-form-->/;
	next;
      }
      if (/<!--x-search-form-->/) {
	$ignore = 1;
	next;
      }
      s/\$([^\$]+)\$/$varhash{$1}/ge;
      $data .= $_;
    }
  } else {
    local $/;
    $data = <$fh>;
    $data =~ s/\$([^\$]+)\$/$varhash{$1}/ge;
  }
  $data;
}

sub print_template {
  my $fhout = shift;
  my $file = shift;
  if (-e $file) {
    print "Reading template file $file\n"  if $debug;
    local(*FILE);
    if (open(FILE, $file)) {
      print $fhout read_template(\*FILE, @_);
      close(FILE);
    } else {
      warn qq/Warning: Unable to open "$file": $!\n/;
    }
  }
}

sub namazu_cleanup {
  my $dir = shift;
  my $lock = join('/', $dir, 'NMZ.lock2');
  local(*LOCK);
  if (!open(LOCK, $lock)) {
    # no lock file left around, so everything should be okay
    return;
  }
  my $pid = <LOCK>;
  close(LOCK);
  if (!kill(0, $pid)) {
    warn qq/Warning: Stale "$lock", removing it\n/;
    if (!unlink($lock)) {
      warn qq/Warning: Unable to remove "$lock": $!\n/;
    }
  }
}

sub format_date {
  my $time = shift;
  my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time);
  $year += 1900;  ++$mon;
  sprintf("%d-%02d-%02d %02d:%02d:%02d", $year,$mon,$mday,$hour,$min,$sec);
}

##  Retrieve the last time an archive was updated.
#   We scan the message pages since their mtime should be set to the
#   date of the message.
#
sub retrieve_last_update {
  my $archive = shift;

  local(*DIR);
  if (!opendir(DIR, $archive)) {
    warn qq/Warning: Unable to open "$archive" for reading: $!\n/;
    return undef;
  }

  my $latest = 0;
  my $mtime = 0;
  my $file;
  foreach $file (readdir(DIR)) {
    next  unless $file =~ /^msg\d+\.html/;
    $mtime = (stat(join('/', $archive, $file)))[9];
    $latest = $mtime  if ($mtime > $latest);
  }
  close(DIR);

  if ($latest == 0) {
    # No luck with message pages, so try database file
    warn qq/Warning: Unable to determine last update from message pages /,
	          qq/for "$archive"\n/;
    if (-e join('/', $archive, '.mhonarc.db')) {
      $latest = ((stat(_))[9]);
    } elsif (-e join('/', $archive, 'mhonarc.db')) {
      $latest = ((stat(_))[9]);
    }
  }
  if ($latest == 0) {
    # No luck with data, so use directory mtime
    $latest = ((stat($archive))[9]);
  }

  $latest;
}

##  Retrieve the list info URL.
#
sub get_info_url {
  my %opts = @_;
  my $pathname = join('/', $opts{'-dir'}, $opts{'-name'}) . '.html';
  if (! -e $pathname) {
    return undef;
  }
  join('/',  $opts{'-baseurl'}, $opts{'-name'}) . '.html';
}

##  Generats the all-lists index.
#
sub update_archive_index {
  print "Generating root archive index...\n"  if $debug;

  my %opts	  = @_;
  my $config      = $opts{'-config'};
  my $listdef     = $opts{'-listdef'};
  my $html_dir    = $opts{'-htmldir'};
  my $html_url    = $opts{'-htmlurl'};
  my $info_dir    = $opts{'-infodir'};
  my $info_url    = $opts{'-infourl'};
  my $index_html  = $opts{'-allindex'};

  my $header	  = $opts{'-header'} ||
			      $config->{'ALL_LISTS_HEADER'} ||
			      join('/', $html_dir, '.PNM.all-head');
  my $footer	  = $opts{'-footer'} ||
			      $config->{'ALL_LISTS_FOOTER'} ||
			      join('/', $html_dir, '.PNM.all-foot');

  local(*IDX);
  my $tmp_index   = $index_html . ".tmp";
  if (!open(IDX, ">$tmp_index")) {
    warn qq/Warning: Unable to create "$tmp_index": $!\n/;
    return;
  }

  my %updated = ( );
  my($list, $listname, $last_updated, $dir, $latest, $info);

  foreach $listname (keys %$listdef) {
    next  if $listname =~ /^\./;  # skip hidden archives

    foreach $list ($listname, "$listname.CVS") {
      $dir = join('/', $html_dir, $list);
      next  unless -e $dir;

      print "Computing last update for $list...\n"  if $debug;
      my @months = get_periods($dir);
      next  unless @months;
      $latest = $months[0];
      $last_updated = retrieve_last_update(join('/', $dir, $latest));
      if (!defined($last_updated)) {
	print "Unable to compute last update for $list.\n"  if $debug;
	next;
      }
      $updated{$list} = [ $last_updated, $list, $listname, $dir, $latest ];
    }
  }

  print_template(\*IDX, $header);
  print IDX qq|<table class="archiveLists" cellpadding="3" cellspacing="1">\n|,
	    qq|<tr class="listsHeaderRow" valign="baseline" align="left">\n|,
	    qq|<th>Name</th>|,
	    qq|<th>Current&nbsp;Index</th>|,
	    qq|<th>Last&nbsp;Updated</th>|,
	    qq|\n</tr>\n|;

  my($time);
  foreach $list (sort { $updated{$b}->[0] <=> $updated{$a}->[0] }
		      keys(%updated)) {
    ($time, $list, $listname, $dir, $latest) = @{$updated{$list}};

    print "Printing listing for $list\n"  if $debug;
    $last_updated = format_date($time);
    $last_updated =~ s/ /\&nbsp;/g;
    my $short_title = entify($listname);
    my $description = entify($listdef->{$listname}{'description'}[0] ||
			     $listname);
    if ($list =~ /\.CVS$/) {
      $short_title .= "&nbsp;(CVS)";
      $description .= " (CVS commits)";
    }
    $info = get_info_url(
	'-name'    => $listname,
	'-dir'     => $info_dir,
	'-baseurl' => $info_url
    );

    print IDX qq|<tr valign="baseline">\n|;

    print IDX qq|<td>&nbsp;<span class="listName">|,
	      qq|<a href="$html_url/$list/">$short_title</a></span>&nbsp;|;
    print IDX qq|<a class="infoLink" href="$info">[info]</a>&nbsp;|
		  if defined($info);
    print IDX qq|</td>\n|;


    print IDX qq|<td>|;
    print IDX qq|&nbsp;<a href="$html_url/$list/$latest/index.html">[Date]</a>&nbsp;|
	if (-e join('/', $dir, $latest, 'index.html'));
    print IDX qq|&nbsp;<a href="$html_url/$list/$latest/threads.html">[Thread]</a>&nbsp;|
	if (-e join('/', $dir, $latest, 'threads.html'));
    print IDX qq|</td>\n|;

    print IDX qq|<td>&nbsp;<tt>|, $last_updated, qq|</tt>&nbsp;</td>\n|;
  }

  print IDX qq|</table>\n|;
  print_template(\*IDX, $footer);
  close(IDX);

  if (!rename($tmp_index, $index_html)) {
    warn qq/Warning: Unable to rename "$tmp_index" to "$index_html": $!\n/;
  }
}


############################################################################
__END__

=head1 NAME

web-archive - Update/create MHonArc archives from mailbox archives

=head1 SYNOPSIS

  web-archive
  web-archive [options]
  web-archive [options] [list-name ...]

=head1 DESCRIPTION

This program is part of the auto-archiving system that works in
conjuction with Procmail, Namazu, and a collection of shell and
Perl programs.  This program has the responsibility of processing
the mailbox archives created by the B<filter-spool> script to
update and/or create MHonArc archives.

This program is automatically called by the B<read-mail> script for
processing incoming mail within the mail spool if B<filter-spool>
returns with an okay status.  However, this program can be manually
invoked to rebuild archives, edit existing archives, or other
administrative tasks.  Since there may be need to do selective archive
processing, any non-option related argument is treated as mailing
list archive name to process.

=head1 OPTIONS

=over

=item C<-alllistidx> I<pathname>

Pathname of file to generate the all lists index.
If not specified, the value of the C<ALL_LISTS_INDEX> variable in
C<config.sh> is used, else it defaults to "C<I<-htmldir>/lists.html>".

=item C<-alllistsurl> I<url>

URL to page containing list of all mailing lists archived.
If not specified, the value of the C<ALL_LISTS_URL> variable in
C<config.sh> is used, else it defaults to C<-htmlurl>.

=item C<-editidx>

Edit archive pages, useful to apply MHonArc resource
changes.

=item C<-editrootidx>

Only regenerate root index pages for archives.  This is useful if
you make changes to the C<.PNM.head> or C<.PNM.foot> files that you
want immediately applied.

=item C<-help>

Print out usage information.

=item C<-home> I<pathname>

Root pathname of archiving software and data.
If not specified, the parent directory that contains this program
is used.

=item C<-htmldir> I<pathname>

Root directory for html archives.
If not specified, "C<I<-home>/html>" is used.

=item C<-htmlurl> I<url>

URL root to HTML archives.
If not specified, defaults to C<I<rooturl>/html>.

=item C<-infodir> I<pathname>

Pathname of directory containing informational pages for each list
archive.  Information for a list archive can be provided by creating a
file called "C<I<list-name>.html>".  Once created, a link to the file
(based on the value of the C<-infourl> option) will be generated in
the all-lists index to it.

If this option is not specified, the value of the C<INFO_DIR> variable
in C<config.sh> is used, else it defaults to "C<I<-home>/info>".

=item C<-infourl> I<url>

Base URL containing informational pages for each list archive.
If not specified, the value of the C<INFO_URL> variable in
C<config.sh> is used, else it defaults to "C<I<-rooturl>/info>".

=item C<-listsdef> I<pathname>

Pathname to mailing lists definition file.
If not specified, "C<I<-home>/lib/lists.def>" is used.

=item C<-man>

Print out entire manpage.

=item C<-mboxdir> I<pathname>

Root directory for mbox archives.
If not specified, "C<I<-home>/mbox>" is used.

=item C<-mharc> I<pathname>

MHonArc resource file for archives.
If not specified, "C<I<-home>/lib/common.mrc>" is used.

=item C<-mharcdir> I<pathname>

Directory containing list-specifc MHonArc resource files.  A given
list archive can have additional resource settings by creating a
file called C<I<list-name>.mrc> within the directory specified by
C<-mharcdir>.

If C<-mharcdir> is not specified, "C<I<-home>/lib/mrc>" is used.

=item C<-mhamaxsize> I<number>

Maximum MHonArc archive size.
If not specified the value of the C<WA_MAXSIZE> environment variable is used.

=item C<-mhapagesize> I<number>

Maximum MHonArc index page size.
If not specified the value of the C<WA_PAGESIZE> environment variable is used.

=item C<-mknmz> I<pathname>

Pathname to Namazu make search index program.
If not specified, "C</usr/local/bin/mknmz>" is used.

=item C<-mknmzrc> I<pathname>

Pathname to Namazu configuration file.
If not specified, "C<I<-home>/cgi-bin/mknmzrc>" is used.

=item C<-mknmztmpldir> I<pathname>

Pathname to Namazu template directory.
If not specified, "C<I<-home>/cgi-bin/template>" is used.

=item C<-mnavcgi> I<url>

URL to monthly navigation cgi program.
If not specified, C<I<rooturl>/cgi-bin/mnav.cgi> is used.

=item C<-mtimeage> I<seconds>

Modify time age of a mailbox file to be considered for processing.  If
not specified the value of the C<WA_MTIME_AGE> environment variable is
used.

=item C<-nosearch>

Do not update search indexes.

=item C<-rebuild>

Rebuild archives from scratch.

=item C<-rooturl> I<url>

URL root of archives.
If not specified, C</~mhonarc/archives> is used.

=item C<-searchcgi> I<url>

URL to search cgi program.
If not specified, C<I<rooturl>/cgi-bin/namazu.cgi> is used.

=item C<-verbose>

Show what is going on in detail.

=back

=head1 ENVIRONMENT

Environment variable usage is deprecated.

The following environment variables are recognized:

=over

=item C<WA_DEBUG>

If set to a true value, detailed information of progress will be
printed to stdout.  Debugging can also be enabled by the
C<-debug> command-line option.

=item C<WA_EDIT>

If set to a true value, archives will be editted.  It is probably
better to use the C<-editidx> command-line option instead if archives
editing is desired.

=item C<WA_MAXSIZE>

Maximum MHonArc archive size.  The default value is 2000.  This setting
can be overridden by the C<-mhamaxsize> command-line option.

=item C<WA_MTIME_AGE>

The modification age, in seconds, for a mailbox to be considered
for processing.  The default value is C<86400> (one day).
This setting can be overridden by the C<-mtimeage> command-line option.

=item C<WA_NOSEARCH>

If set to a true value, the Namazu search indexes will not be updated
for archives processed.  Disabling of search index updates can also be
disabled by the C<-nosearch> command-line option.

=item C<WA_PAGESIZE>

MHonArc index page size.  The default value is 200.  This setting
can be overridden by the C<-mhapagesize> command-line option.

=item C<WA_REBUILD>

If set to a true value, archives will be rebuilt.  It is probably
better to use the C<-rebuild> command-line option instead if rebuilding
is desired.

=back

=head1 VERSION

$Id: web-archive,v 1.31 2002/09/03 16:30:47 ehood Exp $

=head1 AUTHOR

Earl Hood, mhonarc@mhonarc.org 

This program is part of the MHArc archiving system and comes with
ABSOLUTELY NO WARRANTY and may be copied only under the terms of
the GNU General Public License, which may be found in the MHArc
distribution.

=cut

