CVSROOT: /cvsroot/mhonarc
Module name: mharc
Branch:
Changes by: Earl Hood <earl(_at_)earlhood(_dot_)com> 03/11/26 15:02:15
Modified files:
bin : web-archive
Log message:
Added -period command-line option.
Patches:
Index: mharc/bin/web-archive
diff -u mharc/bin/web-archive:1.44 mharc/bin/web-archive:1.45
--- mharc/bin/web-archive:1.44 Sat Aug 9 13:56:05 2003
+++ mharc/bin/web-archive Wed Nov 26 15:02:14 2003
@@ -1,7 +1,7 @@
#!/usr/local/bin/perl
##---------------------------------------------------------------------------##
## File:
-## $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
+## $Id: web-archive,v 1.45 2003/11/26 20:02:14 ehood Exp $
## Description:
## Updates/creates web archives from mailbox archives.
## Run script with '-man' option to view manpage for this program.
@@ -82,6 +82,7 @@
'mtimeage=i', # Modify time age of a mailbox file to be considered
# for processing.
'nosearch', # Do not update search indexes.
+ 'period=s@', # Periods to process
'rebuild', # Rebuild archives from scratch.
'rooturl=s', # Root URL to archives.
'searchcgi=s', # Search CGI URL.
@@ -235,7 +236,7 @@
closedir(DIR);
}
- my(@months, @folders);
+ my(@months, @folders, @opt_periods);
my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
$folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
$disable_search, $listname, $short_title);
@@ -248,15 +249,42 @@
$listname = $list;
$cvs = ($listname =~ s/\.CVS$//);
- if (!$editidx && !$editrootidx) {
- # Get list of input mailboxes to process
+ if ($opt{'period'}) {
+ # List of periods explicitly specified on command-line
+ foreach $mon (@{$opt{'period'}}) {
+ if ($mon !~ /^[\d\-]+$/) {
+ warn qq/Warning: "$mon" is not a valid period specification\n/;
+ next;
+ }
+ push(@opt_periods, $mon);
- $dir = join('/', $MBOX_DIR, $list);
- if (!opendir(DIR, $dir)) {
- warn qq/Unable to open "$dir": $!/;
- next;
+ if ($editidx) {
+ # if just editing pages, we check against HTML archive directories
+ $mondir = join('/', $HTML_DIR, $list, $mon);
+ if (-e $mondir) {
+ push(@folders, $mondir);
+ next;
+ }
+ } else {
+ # else, we check against raw mailbox files
+ $mondir = join('/', $MBOX_DIR, $list, $mon);
+ if (-e $mondir) {
+ push(@folders, $mondir);
+ next;
+ }
+ if (-e $mondir.'.gz') {
+ push(@folders, $mondir.'.gz');
+ next;
+ }
+ }
+ warn qq/Warning: "$mondir" does not exist\n/;
}
+ # if specified periods do not exist, skip to next archive
+ next if (!(_at_)folders);
+ }
+
+ if (!$editidx && !$editrootidx) {
# create .noraw file indicator if no-raw-link specified
my $no_raw_file = join('/', $dir, '.noraw');
my $no_raw_htaccess = join('/', $dir, '.htaccess');
@@ -285,20 +313,28 @@
}
}
- @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
- closedir(DIR);
- print "Mboxes: ", join(', ', @months), "\n" if $debug;
-
- foreach $mon (@months) {
- $mondir = join('/', $dir, $mon);
- if ($rebuild) {
- push(@folders, $mondir);
+ # Get list of input mailboxes to process if not specifically provided.
+ if (!(_at_)folders) {
+ $dir = join('/', $MBOX_DIR, $list);
+ if (!opendir(DIR, $dir)) {
+ warn qq/Unable to open "$dir": $!/;
next;
}
- $mtime = (stat($mondir))[9];
- print "$mondir mtime: $mtime\n" if $debug;
- if (($time - $mtime) < $MTIME_AGE) {
- push(@folders, $mondir);
+ @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
+ closedir(DIR);
+ print "Mboxes: ", join(', ', @months), "\n" if $debug;
+
+ foreach $mon (@months) {
+ $mondir = join('/', $dir, $mon);
+ if ($rebuild) {
+ push(@folders, $mondir);
+ next;
+ }
+ $mtime = (stat($mondir))[9];
+ print "$mondir mtime: $mtime\n" if $debug;
+ if (($time - $mtime) < $MTIME_AGE) {
+ push(@folders, $mondir);
+ }
}
}
@@ -307,17 +343,19 @@
} elsif ($editidx) {
# Just editing pages so we get folder list from html directory
- $dir = join('/', $HTML_DIR, $list);
- if (!opendir(DIR, $dir)) {
- warn qq/Unable to open "$dir": $!/;
- next;
- }
- @months = grep { /^$folder_regex$/o } readdir(DIR);
- closedir(DIR);
+ if (!(_at_)folders) {
+ $dir = join('/', $HTML_DIR, $list);
+ if (!opendir(DIR, $dir)) {
+ warn qq/Unable to open "$dir": $!/;
+ next;
+ }
+ @months = grep { /^$folder_regex$/o } readdir(DIR);
+ closedir(DIR);
- foreach $mon (@months) {
- $mondir = join('/', $dir, $mon);
- push(@folders, $mondir);
+ foreach $mon (@months) {
+ $mondir = join('/', $dir, $mon);
+ push(@folders, $mondir);
+ }
}
next if (!(_at_)folders);
print "Editidx Folders: ", join(', ', @folders), "\n" if $debug;
@@ -326,7 +364,7 @@
$htmldir = join('/', $HTML_DIR, $list);
if ($rebuild) {
- clean_html_archive($htmldir, $keepsearch);
+ clean_html_archive($htmldir, $keepsearch, @opt_periods);
}
mkdir($htmldir, 0777);
@@ -377,8 +415,6 @@
if ($cvs) {
push(@mhaargs, '-nothread');
push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
- } else {
- push(@mhaargs, '-thread');
}
if ($list =~ /^\./) {
push(@mhaargs,
@@ -423,6 +459,7 @@
push(@months, $mon);
}
+ my @nmz_mondir = ( );
my $cur_msg_cnt;
for ($i=0; $i < @folders; ++$i) {
$folder = $folders[$i];
@@ -463,11 +500,14 @@
if $debug;
next;
}
+ push(@nmz_mondir, $mondir);
+ }
+ if (!$disable_search && !$nosearch && scalar(@nmz_mondir)) {
# update search index
# The -Y option is used so we do not have to process all months
# to update index.
- if (!$keepsearch && !$nosearch && !$disable_search) {
+ if (!$keepsearch) {
my @nmzargs = (
$MKNMZ,
'--mhonarc', # only do mhonarc pages
@@ -479,7 +519,7 @@
if (!$debug && !$rebuild) {
push(@nmzargs, '--quiet');
}
- push(@nmzargs, $mondir);
+ push(@nmzargs, @nmz_mondir);
print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug;
if (system(@nmzargs)) {
@@ -699,25 +739,43 @@
#
sub clean_html_archive {
my $dir = shift; # Directory of archive
- my $ks = shift; # Flag is search index files should be preserved
- if (!$ks) {
+ my $ks = shift; # Flag if search index files should be preserved
+ my @folders = @_; # Only remove specified periods
+ local $_;
+
+ if (!$ks && !(_at_)folders) {
# delete everything
- print "Removing $htmldir\n" if $debug;
+ print qq/Removing "$dir"...\n/ if $debug;
system('/bin/rm', '-r', $dir);
return;
}
- # keep search index, so must delete each period sub-directory
- local(*DIR);
- opendir(DIR, $dir) ||
- die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
- my @subdirs = map { join('/',$dir,$_) }
- grep { /^$folder_regex$/o } readdir(DIR);
- closedir(DIR);
- my $subdir;
- foreach $subdir (@subdirs) {
- print "Removing $subdir\n" if $debug;
- system('/bin/rm', '-r', $subdir);
+ if (!$ks) {
+ # remove namazu search index files
+ print qq/Removing search index files for "$dir"...\n/ if $debug;
+ system("/bin/rm $dir/NMZ.*");
+ }
+
+ # Delete each period sub-directory
+ if (@folders) {
+ foreach (@folders) {
+ my $subdir = join('/', $dir, $_);
+ print qq/Removing "$subdir"...\n/ if $debug;
+ system('/bin/rm', '-r', $subdir);
+ }
+
+ } else {
+ local(*DIR);
+ opendir(DIR, $dir) ||
+ die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
+ my @subdirs = map { join('/',$dir,$_) }
+ grep { /^$folder_regex$/o } readdir(DIR);
+ closedir(DIR);
+ my $subdir;
+ foreach $subdir (@subdirs) {
+ print qq/Removing "$subdir"...\n/ if $debug;
+ system('/bin/rm', '-r', $subdir);
+ }
}
}
@@ -1066,6 +1124,20 @@
Do not update search indexes.
+=item C<-period> I<period>
+
+Restrict operations to specified time period. This option is
+applicable when C<-rebuild> or C<-editidx> is specified to restrict
+processing to a given period of an archive, especially for large
+archives in order to avoid complete rebuilds.
+
+This option can be specified multiple times. For example:
+
+ web-archive -rebuild -period 2003-11 -period 2003-10 ...
+
+If no list names are provided, the specified periods apply to
+all archives.
+
=item C<-rebuild>
Rebuild archives from scratch.
@@ -1138,7 +1210,7 @@
=head1 VERSION
-$Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
+$Id: web-archive,v 1.45 2003/11/26 20:02:14 ehood Exp $
=head1 AUTHOR
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS