mhonarc-commits
[Top] [All Lists]

mharc/bin web-archive

2003-11-26 13:04:05
CVSROOT:	/cvsroot/mhonarc
Module name:	mharc
Branch: 	
Changes by:	Earl Hood <earl(_at_)earlhood(_dot_)com>	03/11/26 15:02:15

Modified files:
	bin            : web-archive 

Log message:
	Added -period command-line option.

Patches:
Index: mharc/bin/web-archive
diff -u mharc/bin/web-archive:1.44 mharc/bin/web-archive:1.45
--- mharc/bin/web-archive:1.44	Sat Aug  9 13:56:05 2003
+++ mharc/bin/web-archive	Wed Nov 26 15:02:14 2003
@@ -1,7 +1,7 @@
 #!/usr/local/bin/perl
 ##---------------------------------------------------------------------------##
 ##  File:
-##	$Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
+##	$Id: web-archive,v 1.45 2003/11/26 20:02:14 ehood Exp $
 ##  Description:
 ##	Updates/creates web archives from mailbox archives.
 ##	Run script with '-man' option to view manpage for this program.
@@ -82,6 +82,7 @@
     'mtimeage=i',	# Modify time age of a mailbox file to be considered
 			# for processing.
     'nosearch', 	# Do not update search indexes.
+    'period=s@',	# Periods to process
     'rebuild',		# Rebuild archives from scratch.
     'rooturl=s',	# Root URL to archives.
     'searchcgi=s',	# Search CGI URL.
@@ -235,7 +236,7 @@
     closedir(DIR);
   }
 
-  my(@months, @folders);
+  my(@months, @folders, @opt_periods);
   my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
      $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
      $disable_search, $listname, $short_title);
@@ -248,15 +249,42 @@
     $listname = $list;
     $cvs = ($listname =~ s/\.CVS$//);
 
-    if (!$editidx && !$editrootidx) {
-      # Get list of input mailboxes to process
+    if ($opt{'period'}) {
+      # List of periods explicitly specified on command-line
+      foreach $mon (@{$opt{'period'}}) {
+	if ($mon !~ /^[\d\-]+$/) {
+	  warn qq/Warning: "$mon" is not a valid period specification\n/;
+	  next;
+	}
+	push(@opt_periods, $mon);
 
-      $dir = join('/', $MBOX_DIR, $list);
-      if (!opendir(DIR, $dir)) {
-	warn qq/Unable to open "$dir": $!/;
-	next;
+	if ($editidx) {
+	  # if just editing pages, we check against HTML archive directories
+	  $mondir = join('/', $HTML_DIR, $list, $mon);
+	  if (-e $mondir) {
+	    push(@folders, $mondir);
+	    next;
+	  }
+	} else {
+	  # else, we check against raw mailbox files
+	  $mondir = join('/', $MBOX_DIR, $list, $mon);
+	  if (-e $mondir) {
+	    push(@folders, $mondir);
+	    next;
+	  }
+	  if (-e $mondir.'.gz') {
+	    push(@folders, $mondir.'.gz');
+	    next;
+	  }
+	}
+	warn qq/Warning: "$mondir" does not exist\n/;
       }
 
+      # if specified periods do not exist, skip to next archive
+      next  if (!(_at_)folders);
+    }
+
+    if (!$editidx && !$editrootidx) {
       # create .noraw file indicator if no-raw-link specified
       my $no_raw_file = join('/', $dir, '.noraw');
       my $no_raw_htaccess = join('/', $dir, '.htaccess');
@@ -285,20 +313,28 @@
 	}
       }
 
-      @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
-      closedir(DIR);
-      print "Mboxes: ", join(', ', @months), "\n"  if $debug;
-
-      foreach $mon (@months) {
-	$mondir = join('/', $dir, $mon);
-	if ($rebuild) {
-	  push(@folders, $mondir);
+      # Get list of input mailboxes to process if not specifically provided.
+      if (!(_at_)folders) {
+	$dir = join('/', $MBOX_DIR, $list);
+	if (!opendir(DIR, $dir)) {
+	  warn qq/Unable to open "$dir": $!/;
 	  next;
 	}
-	$mtime = (stat($mondir))[9];
-	print "$mondir mtime: $mtime\n"  if $debug;
-	if (($time - $mtime) < $MTIME_AGE) {
-	  push(@folders, $mondir);
+	@months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
+	closedir(DIR);
+	print "Mboxes: ", join(', ', @months), "\n"  if $debug;
+
+	foreach $mon (@months) {
+	  $mondir = join('/', $dir, $mon);
+	  if ($rebuild) {
+	    push(@folders, $mondir);
+	    next;
+	  }
+	  $mtime = (stat($mondir))[9];
+	  print "$mondir mtime: $mtime\n"  if $debug;
+	  if (($time - $mtime) < $MTIME_AGE) {
+	    push(@folders, $mondir);
+	  }
 	}
       }
 
@@ -307,17 +343,19 @@
 
     } elsif ($editidx) {
       # Just editing pages so we get folder list from html directory
-      $dir = join('/', $HTML_DIR, $list);
-      if (!opendir(DIR, $dir)) {
-	warn qq/Unable to open "$dir": $!/;
-	next;
-      }
-      @months = grep { /^$folder_regex$/o } readdir(DIR);
-      closedir(DIR);
+      if (!(_at_)folders) {
+	$dir = join('/', $HTML_DIR, $list);
+	if (!opendir(DIR, $dir)) {
+	  warn qq/Unable to open "$dir": $!/;
+	  next;
+	}
+	@months = grep { /^$folder_regex$/o } readdir(DIR);
+	closedir(DIR);
 
-      foreach $mon (@months) {
-	$mondir = join('/', $dir, $mon);
-	push(@folders, $mondir);
+	foreach $mon (@months) {
+	  $mondir = join('/', $dir, $mon);
+	  push(@folders, $mondir);
+	}
       }
       next  if (!(_at_)folders);
       print "Editidx Folders: ", join(', ', @folders), "\n"  if $debug;
@@ -326,7 +364,7 @@
 
     $htmldir = join('/', $HTML_DIR, $list);
     if ($rebuild) {
-      clean_html_archive($htmldir, $keepsearch);
+      clean_html_archive($htmldir, $keepsearch, @opt_periods);
     }
     mkdir($htmldir, 0777);
 
@@ -377,8 +415,6 @@
       if ($cvs) {
 	push(@mhaargs, '-nothread');
 	push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
-      } else {
-	push(@mhaargs, '-thread');
       }
       if ($list =~ /^\./) {
 	push(@mhaargs,
@@ -423,6 +459,7 @@
 	push(@months, $mon);
       }
 
+      my @nmz_mondir = ( );
       my $cur_msg_cnt;
       for ($i=0; $i < @folders; ++$i) {
         $folder = $folders[$i];
@@ -463,11 +500,14 @@
 	      if $debug;
 	  next;
 	}
+	push(@nmz_mondir, $mondir);
+      }
 
+      if (!$disable_search && !$nosearch && scalar(@nmz_mondir)) {
 	# update search index
 	# The -Y option is used so we do not have to process all months
 	# to update index.
-	if (!$keepsearch && !$nosearch && !$disable_search) {
+	if (!$keepsearch) {
 	  my @nmzargs = (
 		 $MKNMZ,
 		 '--mhonarc',		# only do mhonarc pages
@@ -479,7 +519,7 @@
 	  if (!$debug && !$rebuild) {
 	    push(@nmzargs, '--quiet');
 	  }
-	  push(@nmzargs, $mondir);
+	  push(@nmzargs, @nmz_mondir);
 	  print "Search Index Command: ", join(" ", @nmzargs), "\n"  if $debug;
 
 	  if (system(@nmzargs)) {
@@ -699,25 +739,43 @@
 #
 sub clean_html_archive {
   my $dir = shift;    # Directory of archive
-  my $ks  = shift;    # Flag is search index files should be preserved
-  if (!$ks) {
+  my $ks  = shift;    # Flag if search index files should be preserved
+  my @folders = @_;   # Only remove specified periods
+  local $_;
+
+  if (!$ks && !(_at_)folders) {
     # delete everything
-    print "Removing $htmldir\n"  if $debug;
+    print qq/Removing "$dir"...\n/  if $debug;
     system('/bin/rm', '-r', $dir);
     return;
   }
 
-  # keep search index, so must delete each period sub-directory
-  local(*DIR);
-  opendir(DIR, $dir) ||
-      die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
-  my @subdirs = map { join('/',$dir,$_) }
-		    grep { /^$folder_regex$/o } readdir(DIR);
-  closedir(DIR);
-  my $subdir;
-  foreach $subdir (@subdirs) {
-    print "Removing $subdir\n"  if $debug;
-    system('/bin/rm', '-r', $subdir);
+  if (!$ks) {
+    # remove namazu search index files
+    print qq/Removing search index files for "$dir"...\n/  if $debug;
+    system("/bin/rm $dir/NMZ.*");
+  }
+
+  # Delete each period sub-directory
+  if (@folders) {
+    foreach (@folders) {
+      my $subdir = join('/', $dir, $_);
+      print qq/Removing "$subdir"...\n/  if $debug;
+      system('/bin/rm', '-r', $subdir);
+    }
+
+  } else {
+    local(*DIR);
+    opendir(DIR, $dir) ||
+	die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
+    my @subdirs = map { join('/',$dir,$_) }
+		      grep { /^$folder_regex$/o } readdir(DIR);
+    closedir(DIR);
+    my $subdir;
+    foreach $subdir (@subdirs) {
+      print qq/Removing "$subdir"...\n/  if $debug;
+      system('/bin/rm', '-r', $subdir);
+    }
   }
 }
 
@@ -1066,6 +1124,20 @@
 
 Do not update search indexes.
 
+=item C<-period> I<period>
+
+Restrict operations to specified time period.  This option is
+applicable when C<-rebuild> or C<-editidx> is specified to restrict
+processing to a given period of an archive, especially for large
+archives in order to avoid complete rebuilds.
+
+This option can be specified multiple times.  For example:
+
+  web-archive -rebuild -period 2003-11 -period 2003-10 ...
+
+If no list names are provided, the specified periods apply to
+all archives.
+
 =item C<-rebuild>
 
 Rebuild archives from scratch.
@@ -1138,7 +1210,7 @@
 
 =head1 VERSION
 
-$Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
+$Id: web-archive,v 1.45 2003/11/26 20:02:14 ehood Exp $
 
 =head1 AUTHOR
 

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS