#!/usr/local/bin/perl -w ##--------------------------------------------------------------------------## ## File: ## $Id: mk-procmailrc,v 1.18 2002/09/15 03:33:08 ehood Exp $ ## Description: ## Program to create a procmail recipe file from lists.txt. ## Run script with '-man' option to view manpage for this program. ##--------------------------------------------------------------------------## ## Copyright (C) 2001-2002 Earl Hood ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##--------------------------------------------------------------------------## package MHArc::mk_procmailrc; ##--------------------------------------------------------------------------## # BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); } my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; } use lib "$Dir/../lib"; # Add relative lib to search path # ##--------------------------------------------------------------------------## # use MHArc::Config; my $config = MHArc::Config->load("$Dir/../lib/config.sh"); # ##--------------------------------------------------------------------------## use Getopt::Long; use MHArc::ListDef; use MHArc::Util qw( usage ); MAIN: { my @htaccess = (); my %opt = ( ); my $clstatus = GetOptions(\%opt, 'catch-archive=s', 'disable-catch-archive!', 'final-dest=s', 'home=s', 'mbox-dir=s', 'msgid-cache-size=i', 'out=s', 'procmail-path=s', 'verbose!', 'help', 'man' ); usage(0) unless $clstatus; usage(1) if $opt{'help'}; usage(2) if $opt{'man'}; my $basedir = $opt{'home'} || $config->{'SW_ROOT'} || "$Dir/.."; my $mbox_dir = $opt{'mbox-dir'} || $config->{'MBOX_DIR'} || join('/',$basedir,'mbox'); my $out_file = $opt{'out'} || $config->{'PROCMAILRC'} || join('/', $basedir, 'procmailrc.mharc'); my $procmail_path = $opt{'procmail-path'} || $config->{'PROCMAIL_PATH'}; my $cache_size = $opt{'msgid-cache-size'} || $config->{'MSGID_CACHE_SIZE'} || 16384; my $catch_arc = $opt{'catch-archive'} || $config->{'CATCH_ARCHIVE'} || '.catch'; my $nocatch = defined($opt{'disable-catch-archive'}) ? $opt{'disable-catch-archive'} : ($config->{'DISABLE_CATCH_ARCHIVE'} || 0); my $final_dest = $opt{'final-dest'} || $config->{'FINAL_MSG_DESTINATION'} || '/dev/null'; ## Read lists definition file my $file = shift(@ARGV) || $config->{'LISTS_DEF_FILE'} || "$basedir/lib/lists.def"; my $listdef = MHArc::ListDef->new($file); my $extract_date_prg = join('/', $basedir, 'bin', 'extract-mesg-date'); local(*OUTFILE); my $outfh; if (!defined($out_file) || ($out_file eq "") || ($out_file eq '-')) { $outfh = \*STDOUT; } else { open(OUTFILE, ">$out_file") || die qq/ERROR: Unable to create "$out_file": $!\n/; $outfh = \*OUTFILE; } ## Print procmailrc header print $outfh <{$name}{'address'})) { @addr = @{$listdef->{$name}{'address'}}; } else { @addr = ( ); } if (defined($listdef->{$name}{'from-address'})) { @from_addr = @{$listdef->{$name}{'from-address'}}; } else { @from_addr = ( ) } if (!scalar(@addr) && !scalar(@from_addr)) { # no addresses defined warn qq/Warning: No addresses defined for '$name'\n/; next; } # create procmail regex for list $pm_conditions = '* ('; if (scalar(@addr)) { $pm_conditions .= '^TO_'; $pm_conditions .= '(' if (scalar(@addr) > 1); $pm_conditions .= join('|', @addr); $pm_conditions .= ')' if (scalar(@addr) > 1); $pm_conditions .= '|(^(Delivered-To:|List-Post:).*'; $pm_conditions .= '(' if (scalar(@addr) > 1); $pm_conditions .= join('|', @addr); $pm_conditions .= ')' if (scalar(@addr) > 1); $pm_conditions .= ')'; } if (scalar(@from_addr)) { $pm_conditions .= '|(' if (scalar(@addr)); $pm_conditions .= '^From:(.*[^-a-zA-Z0-9_.])?'; $pm_conditions .= '(' if (scalar(@from_addr) > 1); $pm_conditions .= join('|', @from_addr); $pm_conditions .= ')' if (scalar(@from_addr) > 1); $pm_conditions .= ')' if (scalar(@addr)); } $pm_conditions .= ')'; if (defined($listdef->{$name}{'procmail-condition'})) { foreach $str (@{$listdef->{$name}{'procmail-condition'}}) { $pm_conditions .= "\n$str"; } } # check if doing monthly or yearly archives $period = lc($listdef->{$name}{'period'}[0]) || 'month'; $period = 'month' if ($name eq $catch_arc); if ($period eq 'year') { $time_fmt = '%Y'; } else { $time_fmt = '%Y-%m'; } # check if rule should not be final if matched if ($listdef->{$name}{'final'}) { $pm_copy = ''; } else { $pm_copy = ' c'; } # check if separating out cvs commits if ($check_cvs = $listdef->{$name}{'cvs-commits'}[0]) { $cvs_prefix = $listdef->{$name}{'cvs-subject-prefix'}[0] || 'CVS commit'; foreach $addr (@addr) { print $outfh < program. The procmailrc is generated from Cmharc-rootE/lib/lists.def>. This program is typically invoked by calling C from the mharc root directory with configuration options specified in Cmharc-rootE/lib/config.sh>. =head1 LIST DEFINITION FILE The list definition file, Cmharc-rootE/lib/lists.def>, is read to generate the procmailrc used by L to filter messages into raw mail archives. The format of the file is intended to be simple and more convenient than writing the procmailrc file yourself. The basic format of the file is as follows: =over =item * Any blank links or lines starting with a C<#> are ignored. =item * Lines with the following format: Option-Name: Option-Value is an option to be read by this program. =back =head2 lists.def Supported Options =over =item Name Starts, and defines, the name of the list to be archived. This name will be used as the directory name to contain the archives of the list and the short title. Typical notation is to use the list address, but this is not required, especially if the archive is a combination of multiple lists, or it is prefered to use a more abstract name in case the list address may change in the future. =item Address Mail address of the list. This option can be specified multiple times. Multiple addresses can be use to capture the multiple addresses that represent a single list or to represent multiple lists to be archived together. =item From-Address Mail address of the list. This option is an alternative to C
for cases where the list can only be donoted by the C field of the message. Some lists have the C to be the subscriber's address instead of to the list address. This is fairly common for one-way lists like newsletters where subscribers receive list messages but cannot post to the list. This option can be specified multiple times. Multiple addresses can be use to capture the multiple addresses that represent a single list or to represent multiple lists to be archived together. =item CVS-Commits Boolean option (C<0> or C<1>) if the CVS commits to the list should be separated out into a separate archive. This option is useful for development lists that have all CVS commits of a project mailed to the list. This option allows the CVS commits to be separated out into a separate archive so it will not pollute the main archive that contains discussions. =item CVS-Subject-Prefix Option specifying the C prefix used to denote CVS commits to the list. Used if CVS-Commits is specified. =item Description Brief description of list. This will be used as the main title of archive index pages. =item Final Boolean option (C<0> or C<1>) if generated rule should be final if matched. I.e. If a message matches, further rules will not be examined. This option can be used to short-circuit messages from being archived in other archives. For example, you may want to catch messages that have been cross-posted to a special address to only be archived in the special address archive and not in the regular archives. Another example use of this option is if you use the special C "C<.catch>" (or the C<-catch-archive> setting described in L<"OPTIONS">). Using "C<.catch>" is handy for Final definitions to pre-catch messages that should not be placed in any list archive. =item No-Raw-Link Boolean option (C<0> or C<1>) if link to raw archive from period index page should be created. If set to C<1>, no link will be created. The default is to create a link to the raw archives. You may want to enable this option if your HTML archives have been customized to obscure address to prevent address harvesting. B If this is the case, make sure to change the permission of the C<$MBOX_DIR/mbox/Ilist-nameE>> directory so it is not readable by the HTTP server process so someone cannot backdoor into the raw archives. This is generally done by removing world read permission. Make sure the permissions do allow for the mharc archiving system to read and write to the directory. You could also explicitly deny access to the directory via the HTTP server configuration file. This is the recommended approach since it gives you additional protection in case the C<$MBOX_DIR/mbox/Ilist-nameE>> directory permissions are unintentionally changed to world readable. =item No-Search Boolean option (C/mbox> directory) if searching should be disabled. If set to C<1>, no search index will be created for the archive and the C<$SEARCH-FORM$> custom resource variable will be set to the empty string. =item MHonArc-Options Additional command-line options to pass to MHonArc. Command-line options are specified as they would be at the shell prompt. =item Period If archive is a yearly or monthly archive. Legal values are C or C. If Period is not defined, C is the default. =item Procmail-Condition Additional conditions to apply to base address check. The condition must be legal procmailrc syntax and should include any prefixing C<*>, C, et. al. B Care should be used when using this option, especially if C is true. When C is true, an additionaly rule is already added to check for the C setting. =back =head2 lists.def Example ## In this definition, we define multiple addresses to check. Name: mhonarc-users Description: MHonArc Users Address: mhonarc-users@mhonarc.org Address: mhonarc@ncsa.uiuc.edu Address: mhonarc@rosat.mpe-garching.mpg.de ## This definition defines a list that receives CVS commits and those ## commits should be separated into a special archive as to not ## pollute the discussion messages with cvs commit messages Name: mhonarc-dev Description: MHonArc Development Address: mhonarc-dev@mhonarc.org CVS-Commits: 1 CVS-Subject-Prefix: CVS: =head1 OPTIONS =over =item C<-catch-archive> I The name of the I archive. The I archive collects all messages that do not match any mailing list rules. If this option is not specified, the C variable in C is used, else the name "C<.catch>" is used. B If you use this option, it is recommended that the name starts with a '.'. This insures that no search index built for the archive (a saving in resources) and it will not be listed in a HTTP (Apache) directory listing. =item C<-disable-catch-archive> If specified, no I archive will be defined. B Care should be used when using this option since any message that does not match a normal rule will be lost. =item C<-final-dest> I The destination of messages that make it to the end of the procmailrc. In normal operations, it should be okay to discard messages at the end since it should have matched one of the list rules or have been caught by the catch archive (see C<-catch-archive>). It is normal for messages to make it to the end since the list matching rules create copies of the message during filtering. This typically will be apparent by the "C" destinations in the procmail log. Message copying is done inorder to properly archive a message that has been cross-posted to multiple lists that are being archived. Exceptions to the message copying are for lists with the C option set to 1, for CVS commit archives, or for messages that are captured by the catch archive. This option is generally only set for debugging purposes. If C<-final-dest> is not specified, the C variable in C is used, else C is used. =item C<-help> Print out usage information. =item C<-home> I Root pathname of archiving software and data. If not specified, C variable in C is used, else the parent directory that contains this program is used. =item C<-man> Print out entire manpage. =item C<-mbox-dir> I Root pathname containing raw mailbox archives. If not specified, C variable in C is used, else C/mbox> is used. =item C<-msgid-cache-size> I The maximum size, in bytes, of the message-id cache. The message-id cache is used to avoid processing duplicate messages. If this option is not specified, the C variable in C is used, else 16384 will be used. =item C<-out> I Output filename. If this option is not specified, the C variable in C is used, else C/procmailrc.mharc> is used. If "-" is the I, then the procmailrc will be dumped to standard out. =item C<-procmail-path> I The search path to be used by procmail. I.e. The value to give the C variable in the procmailrc. If this option is not specified, the C variable in C is used. =back =head1 FILES =over =item Cmharc-rootE/lib/lists.def> Mailing lists definition file. =item Cmharc-rootE/lib/config.sh> Main configuration file for mharc. =back =head1 VERSION $Id: mk-procmailrc,v 1.18 2002/09/15 03:33:08 ehood Exp $ =head1 AUTHOR Earl Hood, earl@earlhood.com This program is part of the mharc archiving system and comes with ABSOLUTELY NO WARRANTY and may be copied only under the terms of the GNU General Public License, which may be found in the mharc distribution. =cut