mhonarc-commits
[Top] [All Lists]

CVS: mharc/bin web-archive,1.40,1.41

2003-01-10 14:58:45
Update of /cvsroot/mhonarc/mharc/bin
In directory subversions:/tmp/cvs-serv21550/bin

Modified Files:
	web-archive 
Log Message:
* bin/web-archive:
  . Added -keepsearch option to preserve the search index when
    -rebuild is specified.


Index: web-archive
===================================================================
RCS file: /cvsroot/mhonarc/mharc/bin/web-archive,v
retrieving revision 1.40
retrieving revision 1.41
diff -C2 -r1.40 -r1.41
*** web-archive	29 Sep 2002 03:41:18 -0000	1.40
--- web-archive	10 Jan 2003 21:58:37 -0000	1.41
***************
*** 68,71 ****
--- 68,72 ----
      'htmldir=s',	# Root directory for html archives.
      'htmlurl=s',	# Root URL for html archives.
+     'keepsearch!',	# Keep search index on a rebuild.
      'listsdef=s',	# Pathname to list definition file.
      'mboxdir=s',	# Root directory for mbox archives.
***************
*** 163,166 ****
--- 164,168 ----
    my $rebuild  		= $opt{'rebuild'} ||
  				$ENV{'WA_REBUILD'} || 0;
+   my $keepsearch  	= $opt{'keepsearch'};
    my $editidx  		= $opt{'editidx'} ||
  				$ENV{'WA_EDIT'} || 0;
***************
*** 186,189 ****
--- 188,193 ----
      $editrootidx = 0;
      $editallidx = 0;
+   } else {
+     $keepsearch = 0;
    }
    $editidx = 1  if $editidxonly;
***************
*** 320,325 ****
      $htmldir = join('/', $HTML_DIR, $list);
      if ($rebuild) {
!       print "Removing $htmldir\n"  if $debug;
!       system('/bin/rm', '-r', $htmldir);
      }
      mkdir($htmldir, 0777);
--- 324,328 ----
      $htmldir = join('/', $HTML_DIR, $list);
      if ($rebuild) {
!       clean_html_archive($htmldir, $keepsearch);
      }
      mkdir($htmldir, 0777);
***************
*** 457,461 ****
  	# The -Y option is used so we do not have to process all months
  	# to update index.
! 	if (!$nosearch && !$disable_search) {
  	  my @nmzargs = (
  		 $MKNMZ,
--- 460,464 ----
  	# The -Y option is used so we do not have to process all months
  	# to update index.
! 	if (!$keepsearch && !$nosearch && !$disable_search) {
  	  my @nmzargs = (
  		 $MKNMZ,
***************
*** 685,688 ****
--- 688,717 ----
  }
  
+ ##  Remove HTML archive
+ #
+ sub clean_html_archive {
+   my $dir = shift;    # Directory of archive
+   my $ks  = shift;    # Flag is search index files should be preserved
+   if (!$ks) {
+     # delete everything
+     print "Removing $htmldir\n"  if $debug;
+     system('/bin/rm', '-r', $dir);
+     return;
+   }
+ 
+   # keep search index, so must delete each period sub-directory
+   local(*DIR);
+   opendir(DIR, $dir) ||
+       die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
+   my @subdirs = map { join('/',$dir,$_) }
+ 		    grep { /^$folder_regex$/o } readdir(DIR);
+   closedir(DIR);
+   my $subdir;
+   foreach $subdir (@subdirs) {
+     print "Removing $subdir\n"  if $debug;
+     system('/bin/rm', '-r', $subdir);
+   }
+ }
+ 
  ##  Retrieve the list info URL.
  #
***************
*** 948,951 ****
--- 977,991 ----
  If not specified, the value of the C<INFO_URL> variable in
  C<config.sh> is used, else it defaults to "C<I<-rooturl>/info>".
+ 
+ =item C<-keepsearch>
+ 
+ Preserve search index if C<-rebuild> is specified.  This option
+ is handy if all that is desired is to rebuild the HTML archives
+ from the raw data since the overhead of rebuilding the search indexes
+ will be avoided.
+ 
+ B<CAUTION:> Do not use C<-keepsearch> if you have removed messages
+ from the raw mail archives since resulting HTML message pages may
+ have different URIs than what is stored within the search index.
  
  =item C<-listsdef> I<pathname>

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV