mhonarc-commits
[Top] [All Lists]

CVS: mharc/bin web-archive,1.44,1.45

2004-12-27 16:52:35
Update of /home/users/mhonarc.org/ehood/cvs/mhonarc/mharc/bin
In directory denethor.mallorn.com:/tmp/cvs-serv6944/bin

Modified Files:
	web-archive 
Log Message:
* bin/web-archive:
  . The -period option has been added to restrict operations to
    a specific time period in an archive, or archives.  This his handy
    for rebuild operations where only a specific periods need to be
    rebuilt vs the entire archive.

  . Fixed unreported bug in setting MHonArc's DATEFIELDS based upon
    MSG_DATE_FIELDS in config.sh.

* lib/default.mrc.in.dist:
  . New default resource file.  Before, the use of this file was
    a recommendation of the installation docs, but now it is part
    of the distribution.  Local resource customizations should be
    done in this file vs lib/common.mrc.in.

* lib/common.mrc.in.dist:
  . Added setting of Dublin Core elements in message pages.

* lib/config.sh.dist:
  . MHA_RC changed to $SW_ROOT/lib/default.mrc.



Index: web-archive
===================================================================
RCS file: /home/users/mhonarc.org/ehood/cvs/mhonarc/mharc/bin/web-archive,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -r1.44 -r1.45
*** web-archive	9 Aug 2003 17:56:05 -0000	1.44
--- web-archive	27 Dec 2004 23:52:27 -0000	1.45
***************
*** 83,86 ****
--- 83,87 ----
  			# for processing.
      'nosearch', 	# Do not update search indexes.
+     'period=s@',	# Periods to process
      'rebuild',		# Rebuild archives from scratch.
      'rooturl=s',	# Root URL to archives.
***************
*** 236,240 ****
    }
  
!   my(@months, @folders);
    my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
       $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
--- 237,241 ----
    }
  
!   my(@months, @folders, @opt_periods);
    my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
       $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
***************
*** 249,262 ****
      $cvs = ($listname =~ s/\.CVS$//);
  
!     if (!$editidx && !$editrootidx) {
!       # Get list of input mailboxes to process
  
!       $dir = join('/', $MBOX_DIR, $list);
!       if (!opendir(DIR, $dir)) {
! 	warn qq/Unable to open "$dir": $!/;
! 	next;
        }
  
        # create .noraw file indicator if no-raw-link specified
        my $no_raw_file = join('/', $dir, '.noraw');
        my $no_raw_htaccess = join('/', $dir, '.htaccess');
--- 250,291 ----
      $cvs = ($listname =~ s/\.CVS$//);
  
!     if ($opt{'period'}) {
!       # List of periods explicitly specified on command-line
!       foreach $mon (@{$opt{'period'}}) {
! 	if ($mon !~ /^[\d\-]+$/) {
! 	  warn qq/Warning: "$mon" is not a valid period specification\n/;
! 	  next;
! 	}
! 	push(@opt_periods, $mon);
  
! 	if ($editidx) {
! 	  # if just editing pages, we check against HTML archive directories
! 	  $mondir = join('/', $HTML_DIR, $list, $mon);
! 	  if (-e $mondir) {
! 	    push(@folders, $mondir);
! 	    next;
! 	  }
! 	} else {
! 	  # else, we check against raw mailbox files
! 	  $mondir = join('/', $MBOX_DIR, $list, $mon);
! 	  if (-e $mondir) {
! 	    push(@folders, $mondir);
! 	    next;
! 	  }
! 	  if (-e $mondir.'.gz') {
! 	    push(@folders, $mondir.'.gz');
! 	    next;
! 	  }
! 	}
! 	warn qq/Warning: "$mondir" does not exist\n/;
        }
  
+       # if specified periods do not exist, skip to next archive
+       next  if (!(_at_)folders);
+     }
+ 
+     if (!$editidx && !$editrootidx) {
        # create .noraw file indicator if no-raw-link specified
+       $dir = join('/', $MBOX_DIR, $list);
        my $no_raw_file = join('/', $dir, '.noraw');
        my $no_raw_htaccess = join('/', $dir, '.htaccess');
***************
*** 286,303 ****
        }
  
!       @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
!       closedir(DIR);
!       print "Mboxes: ", join(', ', @months), "\n"  if $debug;
! 
!       foreach $mon (@months) {
! 	$mondir = join('/', $dir, $mon);
! 	if ($rebuild) {
! 	  push(@folders, $mondir);
  	  next;
  	}
! 	$mtime = (stat($mondir))[9];
! 	print "$mondir mtime: $mtime\n"  if $debug;
! 	if (($time - $mtime) < $MTIME_AGE) {
! 	  push(@folders, $mondir);
  	}
        }
--- 315,340 ----
        }
  
!       # Get list of input mailboxes to process if not specifically provided.
!       if (!(_at_)folders) {
! 	$dir = join('/', $MBOX_DIR, $list);
! 	if (!opendir(DIR, $dir)) {
! 	  warn qq/Unable to open "$dir": $!/;
  	  next;
  	}
! 	@months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
! 	closedir(DIR);
! 	print "Mboxes: ", join(', ', @months), "\n"  if $debug;
! 
! 	foreach $mon (@months) {
! 	  $mondir = join('/', $dir, $mon);
! 	  if ($rebuild) {
! 	    push(@folders, $mondir);
! 	    next;
! 	  }
! 	  $mtime = (stat($mondir))[9];
! 	  print "$mondir mtime: $mtime\n"  if $debug;
! 	  if (($time - $mtime) < $MTIME_AGE) {
! 	    push(@folders, $mondir);
! 	  }
  	}
        }
***************
*** 308,322 ****
      } elsif ($editidx) {
        # Just editing pages so we get folder list from html directory
!       $dir = join('/', $HTML_DIR, $list);
!       if (!opendir(DIR, $dir)) {
! 	warn qq/Unable to open "$dir": $!/;
! 	next;
!       }
!       @months = grep { /^$folder_regex$/o } readdir(DIR);
!       closedir(DIR);
  
!       foreach $mon (@months) {
! 	$mondir = join('/', $dir, $mon);
! 	push(@folders, $mondir);
        }
        next  if (!(_at_)folders);
--- 345,361 ----
      } elsif ($editidx) {
        # Just editing pages so we get folder list from html directory
!       if (!(_at_)folders) {
! 	$dir = join('/', $HTML_DIR, $list);
! 	if (!opendir(DIR, $dir)) {
! 	  warn qq/Unable to open "$dir": $!/;
! 	  next;
! 	}
! 	@months = grep { /^$folder_regex$/o } readdir(DIR);
! 	closedir(DIR);
  
! 	foreach $mon (@months) {
! 	  $mondir = join('/', $dir, $mon);
! 	  push(@folders, $mondir);
! 	}
        }
        next  if (!(_at_)folders);
***************
*** 327,331 ****
      $htmldir = join('/', $HTML_DIR, $list);
      if ($rebuild) {
!       clean_html_archive($htmldir, $keepsearch);
      }
      mkdir($htmldir, 0777);
--- 366,370 ----
      $htmldir = join('/', $HTML_DIR, $list);
      if ($rebuild) {
!       clean_html_archive($htmldir, $keepsearch, @opt_periods);
      }
      mkdir($htmldir, 0777);
***************
*** 378,383 ****
  	push(@mhaargs, '-nothread');
  	push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
-       } else {
- 	push(@mhaargs, '-thread');
        }
        if ($list =~ /^\./) {
--- 417,420 ----
***************
*** 392,396 ****
        }
        if (defined($config->{'MSG_DATE_FIELDS'})) {
! 	push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'});
        }
        if (!$debug && !$rebuild) {
--- 429,433 ----
        }
        if (defined($config->{'MSG_DATE_FIELDS'})) {
! 	push(@mhaargs, '-datefields', $config->{'MSG_DATE_FIELDS'});
        }
        if (!$debug && !$rebuild) {
***************
*** 424,427 ****
--- 461,465 ----
        }
  
+       my @nmz_mondir = ( );
        my $cur_msg_cnt;
        for ($i=0; $i < @folders; ++$i) {
***************
*** 464,472 ****
  	  next;
  	}
  
  	# update search index
  	# The -Y option is used so we do not have to process all months
  	# to update index.
! 	if (!$keepsearch && !$nosearch && !$disable_search) {
  	  my @nmzargs = (
  		 $MKNMZ,
--- 502,513 ----
  	  next;
  	}
+ 	push(@nmz_mondir, $mondir);
+       }
  
+       if (!$disable_search && !$nosearch && scalar(@nmz_mondir)) {
  	# update search index
  	# The -Y option is used so we do not have to process all months
  	# to update index.
! 	if (!$keepsearch) {
  	  my @nmzargs = (
  		 $MKNMZ,
***************
*** 480,484 ****
  	    push(@nmzargs, '--quiet');
  	  }
! 	  push(@nmzargs, $mondir);
  	  print "Search Index Command: ", join(" ", @nmzargs), "\n"  if $debug;
  
--- 521,525 ----
  	    push(@nmzargs, '--quiet');
  	  }
! 	  push(@nmzargs, @nmz_mondir);
  	  print "Search Index Command: ", join(" ", @nmzargs), "\n"  if $debug;
  
***************
*** 700,722 ****
  sub clean_html_archive {
    my $dir = shift;    # Directory of archive
!   my $ks  = shift;    # Flag is search index files should be preserved
!   if (!$ks) {
      # delete everything
!     print "Removing $htmldir\n"  if $debug;
      system('/bin/rm', '-r', $dir);
      return;
    }
  
!   # keep search index, so must delete each period sub-directory
!   local(*DIR);
!   opendir(DIR, $dir) ||
!       die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
!   my @subdirs = map { join('/',$dir,$_) }
! 		    grep { /^$folder_regex$/o } readdir(DIR);
!   closedir(DIR);
!   my $subdir;
!   foreach $subdir (@subdirs) {
!     print "Removing $subdir\n"  if $debug;
!     system('/bin/rm', '-r', $subdir);
    }
  }
--- 741,781 ----
  sub clean_html_archive {
    my $dir = shift;    # Directory of archive
!   my $ks  = shift;    # Flag if search index files should be preserved
!   my @folders = @_;   # Only remove specified periods
!   local $_;
! 
!   if (!$ks && !(_at_)folders) {
      # delete everything
!     print qq/Removing "$dir"...\n/  if $debug;
      system('/bin/rm', '-r', $dir);
      return;
    }
  
!   if (!$ks) {
!     # remove namazu search index files
!     print qq/Removing search index files for "$dir"...\n/  if $debug;
!     system("/bin/rm $dir/NMZ.*");
!   }
! 
!   # Delete each period sub-directory
!   if (@folders) {
!     foreach (@folders) {
!       my $subdir = join('/', $dir, $_);
!       print qq/Removing "$subdir"...\n/  if $debug;
!       system('/bin/rm', '-r', $subdir);
!     }
! 
!   } else {
!     local(*DIR);
!     opendir(DIR, $dir) ||
! 	die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
!     my @subdirs = map { join('/',$dir,$_) }
! 		      grep { /^$folder_regex$/o } readdir(DIR);
!     closedir(DIR);
!     my $subdir;
!     foreach $subdir (@subdirs) {
!       print qq/Removing "$subdir"...\n/  if $debug;
!       system('/bin/rm', '-r', $subdir);
!     }
    }
  }
***************
*** 788,794 ****
    my($list, $listname, $last_updated, $dir, $latest, $info);
  
!   foreach $listname (keys %$listdef) {
      next  if $listname =~ /^\./;  # skip hidden archives
!     next  if $listdef->{'hide-from-all-lists'}[0];
  
      foreach $list ($listname, "$listname.CVS") {
--- 847,853 ----
    my($list, $listname, $last_updated, $dir, $latest, $info);
  
!   foreach $listname ($listdef->get_names) {
      next  if $listname =~ /^\./;  # skip hidden archives
!     next  if $listdef->{$listname}{'hide-from-all-lists'}[0];
  
      foreach $list ($listname, "$listname.CVS") {
***************
*** 1066,1069 ****
--- 1125,1142 ----
  
  Do not update search indexes.
+ 
+ =item C<-period> I<period>
+ 
+ Restrict operations to specified time period.  This option is
+ applicable when C<-rebuild> or C<-editidx> is specified to restrict
+ processing to a given period of an archive, especially for large
+ archives in order to avoid complete rebuilds.
+ 
+ This option can be specified multiple times.  For example:
+ 
+   web-archive -rebuild -period 2003-11 -period 2003-10 ...
+ 
+ If no list names are provided, the specified periods apply to
+ all archives.
  
  =item C<-rebuild>

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS