# # -*- Perl -*- # $Id: mhonarc.pl,v 1.24 2001/09/15 00:45:30 kenji Exp $ # Copyright (C) 1997-2000 Satoru Takabayashi , # 1999 NOKUBI Takatsugu, # 2002 Earl Hood # All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either versions 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA # # This file must be encoded in EUC-JP encoding # package mhonarc; use strict; require 'util.pl'; require 'gfilter.pl'; require 'html.pl'; require 'mailnews.pl'; # # This pattern specifies MHonArc's file names. # NOTE: MHonArc does allow one to customize the filename for message files. # So we make the regex a little flexible to handle common variations. # my $MHONARC_MESSAGE_FILE = '\bmsg\d+\.s?html(?:\.gz)?\Z'; sub mediatype() { return ('text/html; x-type=mhonarc'); } sub status() { return 'yes'; } sub recursive() { return 0; } sub pre_codeconv() { return 1; } sub post_codeconv () { return 0; } sub add_magic ($) { my ($magic) = @_; $magic->addMagicEntry('0 string \'); if ($pos > 0) { load_mhonarc_fields($mha_fields, $weighted_str, substr($$contref, 0, $pos)); } # Strip off front-matter $pos = index($$contref, ''); substr($$contref, 0, $pos + length('')) = ""; # Strip off end-matter $pos = index($$contref, ''); substr($$contref, $pos) = ""; # Extract message header for separate processing, will be added back my $msg_header = ""; $pos = index($$contref, ''); if ($pos >= 0) { $msg_header = substr($$contref, 0, $pos); substr($$contref, 0, $pos) = ""; } # Strip out stuff between header and body $pos = index($$contref, ''); substr($$contref, 0, $pos + length('')) = ""; # Reformat header to make it nice for mailnews filter if ($msg_header ne "") { $msg_header =~ s/\A\s+//; html::remove_html_elements(\$msg_header); $msg_header =~ s/^\s*([\w\-_]+:)/$1/gm; $msg_header =~ s/^([\w\-_]+:)(?:[^\n\S]*\n[^\n\S]*)+/$1 /gm; } # Format MHonArc X comment extracted headers as regular headers my $mha_header = ""; my($fld_name, $fld_value); while (($fld_name, $fld_value) = each %$mha_fields) { $mha_header .= join('', $fld_name, ': ', $fld_value, "\n"); } # Added header back to content string. $$contref = $mha_header . $msg_header . "\n" . $$contref; # Return extract MHonArc fields #$mha_fields; } sub load_mhonarc_fields { my $fields = shift; my $weighted_str = shift; my $mha_head = shift; if ($mha_head =~ //) { my $subject = uncommentize($1); 1 while ($subject =~ s/\A\s*(re|sv|fwd|fw)[\[\]\d]*[:>-]+\s*//i); $subject =~ s/\A\s*\[[^\]]+\]\s*//; $fields->{'subject'} = $subject; } if ($mha_head =~ //) { $fields->{'from'} = mrot13(uncommentize($1)); } elsif ($mha_head =~ //) { $fields->{'from'} = uncommentize($1); } if ($mha_head =~ //) { $fields->{'message-id'} = '<' . uncommentize($1). '>'; } if ($mha_head =~ //) { $fields->{'date'} = uncommentize($1); } } sub uncommentize { my($txt) = $_[0]; $txt =~ s/&#(\d+);/pack("C",$1)/ge; $txt; } sub mrot13 { my $str = shift; $str =~ tr/@A-Z[a-z/N-Z[(_at_)A-Mn-za-m/; $str; } 1;