#!/usr/local/bin/perl -w
##---------------------------------------------------------------------------##
##  File:
##	$Id: mk-procmailrc,v 1.16 2002/08/01 01:18:46 ehood Exp $
##  Description:
##	Program to create a procmail recipe file from lists.txt.
##	Run script with '-man' option to view manpage for this program.
##---------------------------------------------------------------------------##
##  Copyright (C) 2001-2002	Earl Hood <earl@earlhood.com>
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##  
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##  
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##  02111-1307, USA
##---------------------------------------------------------------------------##

package MHArc::mk_procmailrc;

my $Dir;
BEGIN {
  $Dir = `dirname $0`; chomp $Dir;
}
use lib "$Dir/../lib";  # Add relative lib to search path

use MHArc::Config;
my $config = MHArc::Config->load("$Dir/../lib/config.sh");

use Getopt::Long;
use MHArc::ListDef;
use MHArc::Util qw( usage );

MAIN: {
  my @htaccess = ();
  my %opt = ( );
  my $clstatus = GetOptions(\%opt,
    'catch-archive=s',
    'disable-catch-archive!',
    'final-dest=s',
    'home=s',
    'mbox-dir=s',
    'msgid-cache-size=i',
    'out=s',
    'procmail-path=s',

    'verbose!',
    'help',
    'man'
  );
  usage(0) unless $clstatus;
  usage(1) if $opt{'help'};
  usage(2) if $opt{'man'};

  my $basedir       = $opt{'home'} ||
		      $config->{'SW_ROOT'} ||
		      "$Dir/..";
  my $mbox_dir	    = $opt{'mbox-dir'} ||
		      $config->{'MBOX_DIR'} ||
		      join('/',$basedir,'mbox');
  my $out_file      = $opt{'out'} ||
		      $config->{'PROCMAILRC'} ||
		      join('/', $basedir, 'procmailrc.mharc');
  my $procmail_path = $opt{'procmail-path'} ||
		      $config->{'PROCMAIL_PATH'};
  my $cache_size    = $opt{'msgid-cache-size'} ||
		      $config->{'MSGID_CACHE_SIZE'} ||
		      16384;
  my $catch_arc     = $opt{'catch-archive'} ||
		      $config->{'CATCH_ARCHIVE'} ||
		      '.catch';
  my $nocatch       = defined($opt{'disable-catch-archive'}) ?
			  $opt{'disable-catch-archive'} :
			  ($config->{'DISABLE_CATCH_ARCHIVE'} || 0);
  my $final_dest    = $opt{'final-dest'} ||
		      $config->{'FINAL_MSG_DESTINATION'} ||
		      '/dev/null';

  ## Read lists definition file
  my $file = shift(@ARGV) ||
	     $config->{'LISTS_DEF_FILE'} ||
	     "$basedir/lib/lists.def";
  my $listdef = MHArc::ListDef->new($file);

  my $extract_date_prg = join('/', $basedir, 'bin', 'extract-mesg-date');

  local(*OUTFILE);
  my $outfh;
  if (!defined($out_file) || ($out_file eq "") || ($out_file eq '-')) {
    $outfh = \*STDOUT;
  } else {
    open(OUTFILE, ">$out_file") ||
	die qq/ERROR: Unable to create "$out_file": $!\n/;
    $outfh = \*OUTFILE;
  }

  ## Print procmailrc header
  print $outfh <<EOT;
##======================================================================
##	!!AUTO-CREATED, DO NOT EDIT!!
##	Procmail resource file for MHonArc archives
##======================================================================
##	This recipe is only responsible for storing messages within
##	mail folders.  A separate process will be used to generate
##	HTML archives.
##======================================================================

SHELL=/bin/sh
LINEBUF=4096
UMASK=133
PATH=$procmail_path
BASEDIR=$basedir

LOGFILE=\$BASEDIR/log/procmail.log

## Do alot of logging?
#VERBOSE=yes

## Should deliveries be logged?
LOGABSTRACT=yes

## Root path to mail folders
MBOXROOT=$mbox_dir

## Flag if a list was matched
HAVEMATCH=no

##======================================================================
##  Start Rules
##======================================================================

## Avoid duplicate messages
:0 Wh: \$BASEDIR/msgid.lock
| formail -D $cache_size \$BASEDIR/msgid.cache

EOT

  ## Print recipies
  my ($name,
      $cvs_prefix,
      $check_cvs,
      $addr,
      $period,
      $pm_conditions,
      $pm_copy,
      $time_fmt);
  my (@addr);
  my (@from_addr);
  my ($str);

  foreach $name (sort keys %$listdef) {
    if (defined($listdef->{$name}{'address'})) {
      @addr = @{$listdef->{$name}{'address'}};
    } else {
      @addr = ( );
    }
    if (defined($listdef->{$name}{'from-address'})) {
      @from_addr = @{$listdef->{$name}{'from-address'}};
    } else {
      @from_addr = ( )
    }
    if (!scalar(@addr) && !scalar(@from_addr)) {
      # no addresses defined
      warn qq/Warning: No addresses defined for '$name'\n/;
      next;
    }

    # create procmail regex for list
    $pm_conditions  = '* (';
    if (scalar(@addr)) {
      $pm_conditions .= '^TO_';
      $pm_conditions .= '('  if (scalar(@addr) > 1);
      $pm_conditions .= join('|', @addr);
      $pm_conditions .= ')'  if (scalar(@addr) > 1);
      $pm_conditions .= '|(^(Delivered-To:|List-Post:).*';
      $pm_conditions .= '('  if (scalar(@addr) > 1);
      $pm_conditions .= join('|', @addr);
      $pm_conditions .= ')'  if (scalar(@addr) > 1);
      $pm_conditions .= ')';
    }
    if (scalar(@from_addr)) {
      $pm_conditions .= '|('  if (scalar(@addr));
      $pm_conditions .= '^From:(.*[^-a-zA-Z0-9_.])?';
      $pm_conditions .= '('  if (scalar(@from_addr) > 1);
      $pm_conditions .= join('|', @from_addr);
      $pm_conditions .= ')'  if (scalar(@from_addr) > 1);
      $pm_conditions .= ')'  if (scalar(@addr));
    }
    $pm_conditions .= ')';

    if (defined($listdef->{$name}{'procmail-condition'})) {
      foreach $str (@{$listdef->{$name}{'procmail-condition'}}) {
	$pm_conditions .= "\n$str";
      }
    }

    # check if doing monthly or yearly archives
    $period = lc($listdef->{$name}{'period'}[0]) || 'month';
    $period = 'month'  if ($name eq $catch_arc);
    if ($period eq 'year') {
      $time_fmt = '%Y';
    } else {
      $time_fmt = '%Y-%m';
    }

    # check if rule should not be final if matched
    if ($listdef->{$name}{'final'}) {
      $pm_copy = '';
    } else {
      $pm_copy = ' c';
    }

    # check if separating out cvs commits
    if ($check_cvs = $listdef->{$name}{'cvs-commits'}[0]) {
      $cvs_prefix = $listdef->{$name}{'cvs-subject-prefix'}[0] || 'CVS commit';
      foreach $addr (@addr) {
	print $outfh <<EOT;
## $name (CVS)
:0
$pm_conditions
* ^Subject: $cvs_prefix
{
  :0 Wi
  CMDEXEC_=| mkdir -m 755 -p \$MBOXROOT/$name.CVS

  :0 Wi
  MESGDATE_=| $extract_date_prg -fmt '$time_fmt'

  :0:
  \$MBOXROOT/$name.CVS/\$MESGDATE_
}

EOT
      }
    } # End: $check_cvs

    # address receipe
    print $outfh <<EOT;
## $name
:0
$pm_conditions
{
  :0 Wi
  HAVEMATCH=|echo yes

  :0 Wi
  CMDEXC_=| mkdir -m 755 -p \$MBOXROOT/$name

  :0 Wi
  MESGDATE_=| $extract_date_prg -fmt '$time_fmt'

  :0$pm_copy:
  \$MBOXROOT/$name/\$MESGDATE_
}

EOT
  }

  if (!$nocatch) {
    print $outfh <<EOT;
##======================================================================
##	No Matches
##======================================================================
:0
* HAVEMATCH ?? no
{
  :0 Wic
  * ? test ! -d \$MBOXROOT/$catch_arc
  | mkdir -m 755 -p \$MBOXROOT/$catch_arc

  :0 Wi
  MESGDATE_=| $extract_date_prg -fmt '%Y-%m'

  :0
  \$MBOXROOT/$catch_arc/\$MESGDATE_
}

EOT
  }

  print $outfh <<EOT;
##======================================================================
##	Final destination, typically discard since at least one of the
##	above rules should have matched.  But if paranoid, you can
##	have copies delivered to a real mailbox by the -final-dest
##	option.
##======================================================================
:0
$final_dest
EOT

  if ($outfh != \*STDOUT) {
    close($outfh);
  }

} # End: MAIN

__END__

=head1 NAME

mk-procmailrc - Generate procmailrc from lists.def

=head1 SYNOPSIS

  mk-procmailrc
  mk-procmailrc [options]

=head1 DESCRIPTION

This program is part of MHArc, a web-based mail archiving system.  This
program has the responsibility of generating the main procmailrc
used by the C<filter-spool> program.  The procmailrc is generated
from C<I<mharc-root>/lib/lists.def>.

This program is typically invoked from calling C<make> from
the MHArc root directory with configuration options specified
in C<I<mharc-root>/lib/config.sh>.

=head1 LIST DEFINITION FILE

The list definition file, C<I<mharc-root>/lib/lists.def>, is
read to generate the procmailrc used by B<filter-spool> to
filter messages into raw mail archives.  The format of the file
is intended to be simple and more convenient than writing the
procmailrc file yourself.

The basic format of the file is as follows:

=over

=item *

Any blank links or lines starting with a C<#> are ignored.

=item *

Lines with the following format:

  Option-Name: Option-Value

is an option to be read by this program.

=back

=head2 lists.def Supported Options

=over

=item Name

Starts, and defines, the name of the list to be archived.  This name
will be used as the directory name to contain the archives of the list
and the short title.  Typical notation is to use the list address,
but this is not required, especially if the archive is a combination
of multiple lists, or it is prefered to use a more abstract name
in case the list address may change in the future.

=item Address

Mail address of the list.  This option can be specified multiple times.
Multiple listings can be use to capture the multiple addresses that denote
the list.  This is useful if the list ever moves to a new address.

Multiple listings can also represent multiple lists that will be
archived together.

=item From-Address

Mail address of the list.  This option is an alternative to C<Address>
for cases where the list can only be donoted by the C<From: > field
of the message.  Some lists specify the C<To: > to be the actual
subscribed address to the list instead of to the address of the
list itself.  This is fairly common for one-way lists like newsletters
where subscribers receive list messages but cannot post to the list.

This option can be specified multiple times.  Multiple listings can be
use to capture the multiple addresses that denote the list.  This is
useful if the list ever moves to a new address.  Multiple listings
can also represent multiple lists that will be archived together.

=item CVS-Commits

Boolean option (C<0> or C<1>) if the CVS commits to the list should be
separated out into a separate archive.

This option is useful for development lists that have all CVS commits
of a project mailed to the list.  This option allows the CVS commits
to be separated out into a separate archive so it will not pollute
the main archive that contains discussions.

=item CVS-Subject-Prefix

Option specifying the C<Subject:> prefix used to denote CVS commits to
the list.  Used if CVS-Commits is specified.

=item Description

Brief description of list.  This will be used as the title of archive
index pages.

=item Final

Boolean option (C<0> or C<1>) if generated rule should be final
if matched.  I.e.  If a message matches, further rules will not
be examined.

This option can be used to short-circuit messages from being
archived in other archives.  For example, you may want to catch
messages that have been cross-posted to a special address to
only be archived in the special address archive and not in the
regular archives.

Another example use of this option is if you use the special C<Name>
"C<.catch>" (or the C<-catch-archive> setting described in L<OPTIONS>).
Using "C<.catch>" is handy for Final definitions to pre-catch messages
that should not be placed in any list archive.

=item No-Raw-Link

Boolean option (C<0> or C<1>) if link to raw archive from period index
page should be created.  If set to C<1>, no link will be created.
The default is to create a link to the raw archives.

You may want to enable this option if your HTML archives have
been customized to obscure address to prevent address harvesting.

B<Note:> If this is the case, make sure to change the permission of
the C<I<mharc-root>/mbox> directory so it is not readable by the
HTTP server process so someone cannot backdoor into the raw archives.
This is generally done by removing world read permission.  Make sure
the permission do allow for the MHArc archiving system to read and
write to the directory.

You could also explicitly deny access to the directory via the HTTP
server configuration file.  This is the recommended approach since
it gives you additional protection in case the C<I<mharc-root>/mbox>
directory permissions are unintentionally changed to world readable.

=item No-Search

Boolean option (C<I<mharc-root>/mbox> directory) if searching should
be disabled.  If set to C<1>, no search index will be created for
the archive and the C<$SEARCH-FORM$> custom resource variable will
be set to the empty string.

=item MHonArc-Options

Additional command-line options to pass to MHonArc.  Command-line
options are specified as they would be at the shell prompt.

=item Period

If archive is a yearly or monthly archive.  Legal values are C<year>
or C<month>.  If Period is not defined, C<month> is the default.

=item Procmail-Condition

Additional conditions to apply to base address check.  The condition
must be legal procmailrc syntax and should include any prefixing C<*>,
C<!>, et. al.

B<Note:> Care should be used when using this option, especially if
C<CVS-Commits> is true.  When C<CVS-commits> is true, an additionaly
rule is already added to check for the C<CVS-Subject-Prefix> setting.

=back

=head2 lists.def Example

  ##  In this definition, we define multiple addresses to check.
  Name: mhonarc-users
  Description: MHonArc Mailing List
  Address: mhonarc-users@mhonarc.org
  Address: mhonarc@ncsa.uiuc.edu
  Address: mhonarc@rosat.mpe-garching.mpg.de

  ##  This definition defines a list that receives CVS commits and those
  ##  commits should be separated into a special archive as to not
  ##  pollute the discussion messages with cvs commit messages
  Name: mhonarc-dev
  Description: MHonArc Development Mailing List
  Address: mhonarc-dev@mhonarc.org
  CVS-Commits: 1
  CVS-Subject-Prefix: CVS: 

=head1 OPTIONS

=over

=item C<-catch-archive> I<name>

The name of the I<catch> archive.  The I<catch> archive collects
all messages that do not match any mailing list rules.  If this
option is not specified, the C<CATCH_ARCHIVE> variable in
C<config.sh> is used, else the name "C<.catch>" is used.

B<Note:> If you use this option, it is recommended that the name
starts with a '.'.  This insures that no search index built for
the archive (a saving in resources) and it will not be listed
in a HTTP (Apache) directory listing.

=item C<-disable-catch-archive>

If specified, no I<catch> archive will be defined.

B<Note:> Care should be used when using this option since any
message that does not match a normal rule will be lost.

=item C<-final-dest> I<mailbox>

The destination of messages that make it to the end of the procmailrc.
In normal operations, it should be okay to discard messages at the
end since it should have matched one of the list rules or have been
caught by the catch archive (see C<-catch-archive>).

It is normal for messages to make it to the end since the list matching
rules create copies of the message during filtering.  This typically
will be apparent by the "C<Folder: /dev/null>" destinations in the
procmail log.  Message copying is done inorder to properly archive a
message that has been cross-posted to multiple lists that are being
archived.  Exceptions to the message copying are for lists with the
C<Final> option set to 1, for CVS commit archives, or for messages
that are captured by the catch archive.

This option is generally only set for debugging purposes.

If C<-final-dest> is not specified, the
C<FINAL_MSG_DESTINATION> variable in C<config.sh> is used, else
C</dev/null> is used.

=item C<-help>

Print out usage information.

=item C<-home> I<pathname>

Root pathname of archiving software and data.  If not specified,
C<SW_ROOT> variable in C<config.sh> is used, else the parent directory
that contains this program is used.

=item C<-man>

Print out entire manpage.

=item C<-mbox-dir> I<pathname>

Root pathname containing raw mailbox archives.  If not specified,
C<MBOX_DIR> variable in C<config.sh> is used, else C<I<-home>/mbox>
is used.

=item C<-msgid-cache-size> I<number-of-bytes>

The maximum size, in bytes, of the message-id cache.  The message-id
cache is used to avoid processing duplicate messages.

If this option is not specified, the C<MSGID_CACHE_SIZE> variable
in C<config.sh> is used, else 16384 will be used.

=item C<-out> I<pathname>

Output filename.  If this option is not specified, the C<PROCMAILRC>
variable in C<config.sh> is used, else C<I<-home>/procmailrc.mharc>
is used.

If "-" is the I<pathname>, then the procmailrc will be dumped to
standard out.

=item C<-procmail-path> I<pathname-list>

The search path to be used by procmail.  I.e. The value to give
the C<PATH> variable in the procmailrc.  If this option is
not specified, the C<PROCMAIL_PATH> variable in C<config.sh> is used.

=back

=head1 FILES

=over

=item C<I<mharc-root>/lib/lists.def>

Mailing lists definition file.

=item C<I<mharc-root>/lib/config.sh>

Main configuration file for MHArc.

=back

=head1 VERSION

$Id: mk-procmailrc,v 1.16 2002/08/01 01:18:46 ehood Exp $

=head1 AUTHOR

Earl Hood, earl@earlhood.com

This program is part of the MHArc archiving system and comes with
ABSOLUTELY NO WARRANTY and may be copied only under the terms of
the GNU General Public License, which may be found in the MHArc
distribution.

=cut

