Update of /home/users/mhonarc.org/ehood/cvs/mhonarc/mharc/bin
In directory denethor.mallorn.com:/tmp/cvs-serv6944/bin
Modified Files:
web-archive
Log Message:
* bin/web-archive:
. The -period option has been added to restrict operations to
a specific time period in an archive, or archives. This his handy
for rebuild operations where only a specific periods need to be
rebuilt vs the entire archive.
. Fixed unreported bug in setting MHonArc's DATEFIELDS based upon
MSG_DATE_FIELDS in config.sh.
* lib/default.mrc.in.dist:
. New default resource file. Before, the use of this file was
a recommendation of the installation docs, but now it is part
of the distribution. Local resource customizations should be
done in this file vs lib/common.mrc.in.
* lib/common.mrc.in.dist:
. Added setting of Dublin Core elements in message pages.
* lib/config.sh.dist:
. MHA_RC changed to $SW_ROOT/lib/default.mrc.
Index: web-archive
===================================================================
RCS file: /home/users/mhonarc.org/ehood/cvs/mhonarc/mharc/bin/web-archive,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -r1.44 -r1.45
*** web-archive 9 Aug 2003 17:56:05 -0000 1.44
--- web-archive 27 Dec 2004 23:52:27 -0000 1.45
***************
*** 83,86 ****
--- 83,87 ----
# for processing.
'nosearch', # Do not update search indexes.
+ 'period=s@', # Periods to process
'rebuild', # Rebuild archives from scratch.
'rooturl=s', # Root URL to archives.
***************
*** 236,240 ****
}
! my(@months, @folders);
my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
$folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
--- 237,241 ----
}
! my(@months, @folders, @opt_periods);
my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
$folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
***************
*** 249,262 ****
$cvs = ($listname =~ s/\.CVS$//);
! if (!$editidx && !$editrootidx) {
! # Get list of input mailboxes to process
! $dir = join('/', $MBOX_DIR, $list);
! if (!opendir(DIR, $dir)) {
! warn qq/Unable to open "$dir": $!/;
! next;
}
# create .noraw file indicator if no-raw-link specified
my $no_raw_file = join('/', $dir, '.noraw');
my $no_raw_htaccess = join('/', $dir, '.htaccess');
--- 250,291 ----
$cvs = ($listname =~ s/\.CVS$//);
! if ($opt{'period'}) {
! # List of periods explicitly specified on command-line
! foreach $mon (@{$opt{'period'}}) {
! if ($mon !~ /^[\d\-]+$/) {
! warn qq/Warning: "$mon" is not a valid period specification\n/;
! next;
! }
! push(@opt_periods, $mon);
! if ($editidx) {
! # if just editing pages, we check against HTML archive directories
! $mondir = join('/', $HTML_DIR, $list, $mon);
! if (-e $mondir) {
! push(@folders, $mondir);
! next;
! }
! } else {
! # else, we check against raw mailbox files
! $mondir = join('/', $MBOX_DIR, $list, $mon);
! if (-e $mondir) {
! push(@folders, $mondir);
! next;
! }
! if (-e $mondir.'.gz') {
! push(@folders, $mondir.'.gz');
! next;
! }
! }
! warn qq/Warning: "$mondir" does not exist\n/;
}
+ # if specified periods do not exist, skip to next archive
+ next if (!(_at_)folders);
+ }
+
+ if (!$editidx && !$editrootidx) {
# create .noraw file indicator if no-raw-link specified
+ $dir = join('/', $MBOX_DIR, $list);
my $no_raw_file = join('/', $dir, '.noraw');
my $no_raw_htaccess = join('/', $dir, '.htaccess');
***************
*** 286,303 ****
}
! @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
! closedir(DIR);
! print "Mboxes: ", join(', ', @months), "\n" if $debug;
!
! foreach $mon (@months) {
! $mondir = join('/', $dir, $mon);
! if ($rebuild) {
! push(@folders, $mondir);
next;
}
! $mtime = (stat($mondir))[9];
! print "$mondir mtime: $mtime\n" if $debug;
! if (($time - $mtime) < $MTIME_AGE) {
! push(@folders, $mondir);
}
}
--- 315,340 ----
}
! # Get list of input mailboxes to process if not specifically provided.
! if (!(_at_)folders) {
! $dir = join('/', $MBOX_DIR, $list);
! if (!opendir(DIR, $dir)) {
! warn qq/Unable to open "$dir": $!/;
next;
}
! @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
! closedir(DIR);
! print "Mboxes: ", join(', ', @months), "\n" if $debug;
!
! foreach $mon (@months) {
! $mondir = join('/', $dir, $mon);
! if ($rebuild) {
! push(@folders, $mondir);
! next;
! }
! $mtime = (stat($mondir))[9];
! print "$mondir mtime: $mtime\n" if $debug;
! if (($time - $mtime) < $MTIME_AGE) {
! push(@folders, $mondir);
! }
}
}
***************
*** 308,322 ****
} elsif ($editidx) {
# Just editing pages so we get folder list from html directory
! $dir = join('/', $HTML_DIR, $list);
! if (!opendir(DIR, $dir)) {
! warn qq/Unable to open "$dir": $!/;
! next;
! }
! @months = grep { /^$folder_regex$/o } readdir(DIR);
! closedir(DIR);
! foreach $mon (@months) {
! $mondir = join('/', $dir, $mon);
! push(@folders, $mondir);
}
next if (!(_at_)folders);
--- 345,361 ----
} elsif ($editidx) {
# Just editing pages so we get folder list from html directory
! if (!(_at_)folders) {
! $dir = join('/', $HTML_DIR, $list);
! if (!opendir(DIR, $dir)) {
! warn qq/Unable to open "$dir": $!/;
! next;
! }
! @months = grep { /^$folder_regex$/o } readdir(DIR);
! closedir(DIR);
! foreach $mon (@months) {
! $mondir = join('/', $dir, $mon);
! push(@folders, $mondir);
! }
}
next if (!(_at_)folders);
***************
*** 327,331 ****
$htmldir = join('/', $HTML_DIR, $list);
if ($rebuild) {
! clean_html_archive($htmldir, $keepsearch);
}
mkdir($htmldir, 0777);
--- 366,370 ----
$htmldir = join('/', $HTML_DIR, $list);
if ($rebuild) {
! clean_html_archive($htmldir, $keepsearch, @opt_periods);
}
mkdir($htmldir, 0777);
***************
*** 378,383 ****
push(@mhaargs, '-nothread');
push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
- } else {
- push(@mhaargs, '-thread');
}
if ($list =~ /^\./) {
--- 417,420 ----
***************
*** 392,396 ****
}
if (defined($config->{'MSG_DATE_FIELDS'})) {
! push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'});
}
if (!$debug && !$rebuild) {
--- 429,433 ----
}
if (defined($config->{'MSG_DATE_FIELDS'})) {
! push(@mhaargs, '-datefields', $config->{'MSG_DATE_FIELDS'});
}
if (!$debug && !$rebuild) {
***************
*** 424,427 ****
--- 461,465 ----
}
+ my @nmz_mondir = ( );
my $cur_msg_cnt;
for ($i=0; $i < @folders; ++$i) {
***************
*** 464,472 ****
next;
}
# update search index
# The -Y option is used so we do not have to process all months
# to update index.
! if (!$keepsearch && !$nosearch && !$disable_search) {
my @nmzargs = (
$MKNMZ,
--- 502,513 ----
next;
}
+ push(@nmz_mondir, $mondir);
+ }
+ if (!$disable_search && !$nosearch && scalar(@nmz_mondir)) {
# update search index
# The -Y option is used so we do not have to process all months
# to update index.
! if (!$keepsearch) {
my @nmzargs = (
$MKNMZ,
***************
*** 480,484 ****
push(@nmzargs, '--quiet');
}
! push(@nmzargs, $mondir);
print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug;
--- 521,525 ----
push(@nmzargs, '--quiet');
}
! push(@nmzargs, @nmz_mondir);
print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug;
***************
*** 700,722 ****
sub clean_html_archive {
my $dir = shift; # Directory of archive
! my $ks = shift; # Flag is search index files should be preserved
! if (!$ks) {
# delete everything
! print "Removing $htmldir\n" if $debug;
system('/bin/rm', '-r', $dir);
return;
}
! # keep search index, so must delete each period sub-directory
! local(*DIR);
! opendir(DIR, $dir) ||
! die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
! my @subdirs = map { join('/',$dir,$_) }
! grep { /^$folder_regex$/o } readdir(DIR);
! closedir(DIR);
! my $subdir;
! foreach $subdir (@subdirs) {
! print "Removing $subdir\n" if $debug;
! system('/bin/rm', '-r', $subdir);
}
}
--- 741,781 ----
sub clean_html_archive {
my $dir = shift; # Directory of archive
! my $ks = shift; # Flag if search index files should be preserved
! my @folders = @_; # Only remove specified periods
! local $_;
!
! if (!$ks && !(_at_)folders) {
# delete everything
! print qq/Removing "$dir"...\n/ if $debug;
system('/bin/rm', '-r', $dir);
return;
}
! if (!$ks) {
! # remove namazu search index files
! print qq/Removing search index files for "$dir"...\n/ if $debug;
! system("/bin/rm $dir/NMZ.*");
! }
!
! # Delete each period sub-directory
! if (@folders) {
! foreach (@folders) {
! my $subdir = join('/', $dir, $_);
! print qq/Removing "$subdir"...\n/ if $debug;
! system('/bin/rm', '-r', $subdir);
! }
!
! } else {
! local(*DIR);
! opendir(DIR, $dir) ||
! die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
! my @subdirs = map { join('/',$dir,$_) }
! grep { /^$folder_regex$/o } readdir(DIR);
! closedir(DIR);
! my $subdir;
! foreach $subdir (@subdirs) {
! print qq/Removing "$subdir"...\n/ if $debug;
! system('/bin/rm', '-r', $subdir);
! }
}
}
***************
*** 788,794 ****
my($list, $listname, $last_updated, $dir, $latest, $info);
! foreach $listname (keys %$listdef) {
next if $listname =~ /^\./; # skip hidden archives
! next if $listdef->{'hide-from-all-lists'}[0];
foreach $list ($listname, "$listname.CVS") {
--- 847,853 ----
my($list, $listname, $last_updated, $dir, $latest, $info);
! foreach $listname ($listdef->get_names) {
next if $listname =~ /^\./; # skip hidden archives
! next if $listdef->{$listname}{'hide-from-all-lists'}[0];
foreach $list ($listname, "$listname.CVS") {
***************
*** 1066,1069 ****
--- 1125,1142 ----
Do not update search indexes.
+
+ =item C<-period> I<period>
+
+ Restrict operations to specified time period. This option is
+ applicable when C<-rebuild> or C<-editidx> is specified to restrict
+ processing to a given period of an archive, especially for large
+ archives in order to avoid complete rebuilds.
+
+ This option can be specified multiple times. For example:
+
+ web-archive -rebuild -period 2003-11 -period 2003-10 ...
+
+ If no list names are provided, the specified periods apply to
+ all archives.
=item C<-rebuild>
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS