mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib mhfile.pl,2.7,2.8 mhmimetypes.pl,1.12,1.13 mhtxtht...

2002-11-22 21:10:53
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv14330/lib

Modified Files:
	mhfile.pl mhmimetypes.pl mhtxthtml.pl mhtxtplain.pl 
Log Message:
* Added subdir option to mhtxtplain.pl and mhtxthtml.pl filters since
  the filters can create derived files.
* Updated creation of "subdir" directory to be resistent to symlink
  attacks.
* Javascript URLs are munged by HTML filter.  Further protection against
  XSS attacks.
* <a href>'s preserved by HTML filter, even if onlu cid: URLs allowed.
  This prevents regular hyperlinks from becoming stripped and enticing
  users to use allownoncidurls to work around this (which then opens
  up XSS vulnerabilities).  With the javascript URL munging, preserving
  <a href>'s should be safe.


Index: mhfile.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhfile.pl,v
retrieving revision 2.7
retrieving revision 2.8
diff -C2 -r2.7 -r2.8
*** mhfile.pl	20 Nov 2002 23:53:12 -0000	2.7
--- mhfile.pl	23 Nov 2002 04:10:40 -0000	2.8
***************
*** 30,33 ****
--- 30,34 ----
  use Symbol;
  use Fcntl;
+ use File::Basename;
  
  my $_have_File_Temp;
***************
*** 205,208 ****
--- 206,298 ----
  ##---------------------------------------------------------------------------##
  
+ sub dir_create {
+     my $path  = shift;
+     my $perms = shift || 0777;
+ 
+     if (!$UNIX) {
+ 	## Non-Unix OS's do not have symlinks
+ 	return  if (-e $path);
+ 	if (!mkdir($path, $perms)) {
+ 	    die qq/ERROR: Unable to create "$path": $!\n/;
+ 	}
+ 	return;
+     }
+ 
+     ## Check if $path is a symlink
+     if (-l $path) {
+ 	# symlink, try to delete
+ 	warn qq/Warning: "$path" is a symlink, will try to replace...\n/;
+ 	if (!unlink($path)) {
+ 	    die qq/ERROR: "$path" is a symlink, unable to remove: $!\n/;
+ 	}
+     } elsif (-e $path) {
+ 	die qq/ERROR: "$path" is not a directory: $!\n/  if !(-d _);
+ 	# already exists, nothing to do
+ 	return;
+     }
+ 
+     my $dirname = dirname($path);
+     my @info = stat($dirname);
+     if ($info[2] & Fcntl::S_IWGRP || $info[2] & Fcntl::S_IWOTH) {
+ 	my($i, $errstr, $tmpdir);
+ 	for ($i=0; $i < TEMP_MAX_TRIES; ++$i) {
+ 	    $tmpdir = dir_temp('dirXXXXXXXXXX', $dirname);
+ 	    if (!rename($tmpdir, $path)) {
+ 		$errstr = "$!";
+ 		rmdir($tmpdir);
+ 		if (-l $path) {
+ 		    # hmmmm, somone trying to so something malicious?
+ 		    warn qq/Warning: Possible symlink attack attempted with /,
+ 			 qq/"$path"\n/;
+ 		    die qq/ERROR: "$path" is a symlink, unable to remove: $!\n/
+ 			unless unlink $path;
+ 		} elsif (-d $path) {
+ 		    # somebody snuck in and created it
+ 		    return;
+ 		} elsif (-e _) {
+ 		    die qq/ERROR: "$path" exists, but it did not before, /,
+ 			qq/and it is not a directory!\n/;
+ 		}
+ 	    }
+ 	}
+ 	if ($i >= TEMP_MAX_TRIES) {
+ 	    die qq/ERROR: Unable to rename "$tmpdir" to "$path": $errstr\n/;
+ 	}
+ 
+     } else {
+ 	if (!mkdir($path, $perms)) {
+ 	    die qq/ERROR: Unable to create "$path": $!\n/;
+ 	}
+ 	return;
+     }
+     chmod(($perms &~ umask), $path);
+ }
+ 
+ sub dir_temp {
+     my $template = shift;
+     my $dir	 = shift || $CURDIR;
+     my($tmpdir);
+ 
+     MKTEMP: {
+ 	if ($_have_File_Temp) {
+ 	    $tmpdir =
+ 		File::Temp::tempdir($template, 'DIR' => $dir, 'CLEANUP' => 0);
+ 	    last MKTEMP;
+ 	}
+ 
+ 	my($i);
+ 	for ($i=0; $i < TEMP_MAX_TRIES; ++$i) {
+ 	    ($tmpdir = $template) =~
+ 		s/X/$TEMP_CHARS[int(rand($#TEMP_CHARS))]/ge;
+ 	    $tmpdir = join($DIRSEP, $dir, $tmpdir);
+ 	    last  if mkdir $tmpdir, 0700;
+ 	}
+ 	if ($i >= TEMP_MAX_TRIES) {
+ 	    die qq/ERROR: Unable to create temp dir "$tmpdir": $!\n/;
+ 	}
+     }
+     $tmpdir;
+ }
+ 
  sub dir_remove {
      my($file) = shift;
***************
*** 232,235 ****
--- 322,333 ----
      }
      1;
+ }
+ 
+ ##---------------------------------------------------------------------------##
+ 
+ sub rand_string {
+     my $template = shift;
+     $template =~ s/X/$TEMP_CHARS[int(rand($#TEMP_CHARS))]/ge;
+     $template;
  }
  

Index: mhmimetypes.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhmimetypes.pl,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -r1.12 -r1.13
*** mhmimetypes.pl	20 Nov 2002 23:53:12 -0000	1.12
--- mhmimetypes.pl	23 Nov 2002 04:10:40 -0000	1.13
***************
*** 295,299 ****
      if ($path) {
  	$pathname .= $DIRSEP . $path;
! 	mkdir($pathname, 0777);
      }
  
--- 295,299 ----
      if ($path) {
  	$pathname .= $DIRSEP . $path;
! 	dir_create($pathname);
      }
  

Index: mhtxthtml.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxthtml.pl,v
retrieving revision 2.24
retrieving revision 2.25
diff -C2 -r2.24 -r2.25
*** mhtxthtml.pl	31 Oct 2002 05:58:03 -0000	2.24
--- mhtxthtml.pl	23 Nov 2002 04:10:40 -0000	2.25
***************
*** 91,94 ****
--- 91,96 ----
  ##	notitle  	Do not print title.
  ##
+ ##	subdir		Place derived files in a subdirectory
+ ##
  sub filter {
      my($fields, $data, $isdecode, $args) = @_;
***************
*** 114,117 ****
--- 116,121 ----
      my $notitle	 = $args =~ /\bnotitle\b/i;
      my $onlycid  = $args !~ /\ballownoncidurls\b/i;
+     my $subdir   = $args =~ /\bsubdir\b/i;
+     my $atdir    = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
      my $tmp;
  
***************
*** 131,135 ****
      }
  
!     ## Get/remove BASE url
      BASEURL: {
  	if ($$data =~ s|(<base\s[^>]*>)||i) {
--- 135,140 ----
      }
  
!     ## Get/remove BASE url: The base URL may be defined in the HTML
!     ## data or defined in the entity header.
      BASEURL: {
  	if ($$data =~ s|(<base\s[^>]*>)||i) {
***************
*** 150,154 ****
      $base =~ s|(.*/).*|$1|;
  
!     ## Strip out certain elements/tags to support proper inclusion
      $$data =~ s|<!doctype\s[^>]*>||io;
      $$data =~ s|</?html\b[^>]*>||gio;
--- 155,161 ----
      $base =~ s|(.*/).*|$1|;
  
!     ## Strip out certain elements/tags to support proper inclusion:
!     ## some browsers are forgiving about dublicating header tags, but
!     ## we try to do things right.  It also help minimize XSS exploits.
      $$data =~ s|<!doctype\s[^>]*>||io;
      $$data =~ s|</?html\b[^>]*>||gio;
***************
*** 156,160 ****
      $$data =~ s|<head\s*>[\s\S]*</head\s*>||io;
  
!     ## Strip out <font> tags if requested
      if ($nofont) {
  	$$data =~ s|<style[^>]*>.*?</style\s*>||gios;
--- 163,167 ----
      $$data =~ s|<head\s*>[\s\S]*</head\s*>||io;
  
!     ## Strip out style information if requested.
      if ($nofont) {
  	$$data =~ s|<style[^>]*>.*?</style\s*>||gios;
***************
*** 165,170 ****
      }
  
!     ## Strip out scripting markup if requested
      if ($noscript) {
  	$$data =~ s|<script[^>]*>.*?</script\s*>||gios;
  	$$data =~ s|<style[^>]*>.*?</style\s*>||gios  unless $nofont;
--- 172,193 ----
      }
  
!     ## Strip out scripting markup
      if ($noscript) {
+ 	# Hopefully complete pattern to neutralize javascript:... URLs.
+ 	# The pattern is ugly because we have to handle any combination
+ 	# of regular chars and entity refs.
+ 	$$data =~ s/\b(?:j|&\#(?:0*(?:74|106)|x0*(?:4a|6a))(?:;|(?![0-9])))
+ 		      (?:a|&\#(?:0*(?:65|97)|x0*(?:41|61))(?:;|(?![0-9])))
+ 		      (?:v|&\#(?:0*(?:86|118)|x0*(?:56|76))(?:;|(?![0-9])))
+ 		      (?:a|&\#(?:0*(?:65|97)|x0*(?:41|61))(?:;|(?![0-9])))
+ 		      (?:s|&\#(?:0*(?:83|115)|x0*(?:53|73))(?:;|(?![0-9])))
+ 		      (?:c|&\#(?:0*(?:67|99)|x0*(?:43|63))(?:;|(?![0-9])))
+ 		      (?:r|&\#(?:0*(?:82|114)|x0*(?:52|72))(?:;|(?![0-9])))
+ 		      (?:i|&\#(?:0*(?:73|105)|x0*(?:49|69))(?:;|(?![0-9])))
+ 		      (?:p|&\#(?:0*(?:80|112)|x0*(?:50|70))(?:;|(?![0-9])))
+ 		      (?:t|&\#(?:0*(?:84|116)|x0*(?:54|74))(?:;|(?![0-9])))
+ 		   /_javascript_/gix;
+ 
+ 	# remove scripting elements and attributes
  	$$data =~ s|<script[^>]*>.*?</script\s*>||gios;
  	$$data =~ s|<style[^>]*>.*?</style\s*>||gios  unless $nofont;
***************
*** 213,217 ****
  	    if ($attr{'background'}) {
  		if ($attr{'background'} =
! 			&resolve_cid($onlycid, $attr{'background'})) {
  		    $tpre .= qq|background-image: url($attr{'background'}) |;
  		}
--- 236,240 ----
  	    if ($attr{'background'}) {
  		if ($attr{'background'} =
! 			&resolve_cid($onlycid, $attr{'background'}, $atdir)) {
  		    $tpre .= qq|background-image: url($attr{'background'}) |;
  		}
***************
*** 236,244 ****
      $$data =~ s|</?body[^>]*>||ig;
  
!     ## Check for CID URLs (multipart/related HTML)
!     $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
! 	       join("", $1, &resolve_cid($onlycid, $2), $3)/geoix;
!     $$data =~ s/($UAttr\s*=\s*)([^'">][^\s>]+)/
! 	       join("", $1, '"', &resolve_cid($onlycid, $2), '"')/geoix;
  
      ($title.$$data, @files);
--- 259,303 ----
      $$data =~ s|</?body[^>]*>||ig;
  
!     my $ahref_tmp;
!     if ($onlycid) {
! 	# If only cid URLs allowed, we still try to preserve <a href> or
! 	# any hyperlinks in a document would be stripped out.
! 	# Algorithm: Replace HREF attribute string in <A>'s with a
! 	#	     random string.  We then restore HREF after CID
! 	#	     resolution.  We do not worry about javascript since
! 	#	     we neutralized it earlier.
! 	$ahref_tmp = mhonarc::rand_string('alnkXXXXXXXXXX');
! 
! 	# Make sure "href" not in rand string
! 	$ahref_tmp =~ s/href/XXXX/gi;
! 
! 	# Remove occurances of random string from input first.  This
! 	# should cause nothing to be deleted, but is done to avoid
! 	# a potential exploit attempt.
! 	$$data =~ s/\b$ahref_tmp\b//g;
! 
! 	# Replace all <a href> with <a RAND_STR>.  We make sure to
! 	# leave cid: attributes alone since they are processed later.
! 	$$data =~ s/(<a\b[^>]*)href\s*=\s*("(?!\s*cid:)[^"]+")
! 		   /$1$ahref_tmp=$2/gix;  # double-quoted delim attribute
! 	$$data =~ s/(<a\b[^>]*)href\s*=\s*('(?!\s*cid:)[^']+')
! 		   /$1$ahref_tmp=$2/gix;  # single-quoted delim attribute
! 	$$data =~ s/(<a\b[^>]*)href\s*=\s*((?!['"]?\s*cid:)[^\s>]+)
! 		   /$1$ahref_tmp=$2/gix;  # non-quoted attribute
!     }
! 
!     ## Check for CID URLs (multipart/related HTML).  Multiple expressions
!     ## exist to handle variations in how attribute values are delimited.
!     $$data =~ s/($UAttr\s*=\s*["])([^"]+)(["])
! 	       /join("",$1,&resolve_cid($onlycid, $2, $atdir),$3)/geoix;
!     $$data =~ s/($UAttr\s*=\s*['])([^']+)(['])
! 	       /join("",$1,&resolve_cid($onlycid, $2, $atdir),$3)/geoix;
!     $$data =~ s/($UAttr\s*=\s*)([^\s'">][^\s>]+)
!                /join("",$1,'"',&resolve_cid($onlycid, $2, $atdir),'"')/geoix;
! 
!     if ($onlycid) {
! 	# Restore HREF attributes of <A>'s.
! 	$$data =~ s/\b$ahref_tmp\b/href/g;
!     }
  
      ($title.$$data, @files);
***************
*** 283,286 ****
--- 342,347 ----
      my $onlycid = shift;
      my $cid = shift;
+     print STDERR "CID=$cid\n";
+     my $attachdir = shift;
      my $href = $readmail::Cid{$cid};
      if (!defined($href)) {
***************
*** 307,315 ****
  	my $data = &$decodefunc(${$href->{'body'}});
  	$filename = mhonarc::write_attachment(
! 			    $href->{'fields'}{'content-type'}[0], \$data);
      } else {
  	$filename = mhonarc::write_attachment(
  			    $href->{'fields'}{'content-type'}[0],
! 			    $href->{'body'});
      }
      $href->{'filtered'} = 1; # mark part filtered for readmail.pl
--- 368,379 ----
  	my $data = &$decodefunc(${$href->{'body'}});
  	$filename = mhonarc::write_attachment(
! 			    $href->{'fields'}{'content-type'}[0],
! 			    \$data,
! 			    $attachdir);
      } else {
  	$filename = mhonarc::write_attachment(
  			    $href->{'fields'}{'content-type'}[0],
! 			    $href->{'body'},
! 			    $attachdir);
      }
      $href->{'filtered'} = 1; # mark part filtered for readmail.pl

Index: mhtxtplain.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtplain.pl,v
retrieving revision 2.30
retrieving revision 2.31
diff -C2 -r2.30 -r2.31
*** mhtxtplain.pl	20 Nov 2002 17:12:54 -0000	2.30
--- mhtxtplain.pl	23 Nov 2002 04:10:40 -0000	2.31
***************
*** 62,66 ****
  ##			Colon separated lists of charsets to leave as-is.
  ##			Only HTML special characters will be converted into
! ##			entities.  The default value is "us-ascii:iso-8859-1".
  ##
  ##	attachcheck	Honor attachment disposition.  By default,
--- 62,66 ----
  ##			Colon separated lists of charsets to leave as-is.
  ##			Only HTML special characters will be converted into
! ##			entities.
  ##
  ##	attachcheck	Honor attachment disposition.  By default,
***************
*** 104,107 ****
--- 104,110 ----
  ##			if fancyquote specified.  Overrides builtin style.
  ##
+ ##	subdir		Place derived files in a subdirectory (only
+ ##			applicable if uudecode is specified).
+ ##
  ##	target=name  	Set TARGET attribute for links if converting URLs
  ##			to links.  Defaults to _top.
***************
*** 141,144 ****
--- 144,149 ----
      if ($args =~ s/\buudecode\b//ig) {
  	# $args has uudecode stripped out for recursive calls
+ 	my $subdir = $args =~ /\bsubdir\b/i;
+ 	my $atdir  = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
  
  	# Make sure we have needed routines
***************
*** 187,191 ****
  		    push(@files,
  			 mhonarc::write_attachment(
! 			    'application/octet-stream', \$uddata, '',
  			    ($usename?$file:''), $inext));
  		    $urlfile = mhonarc::htmlize($files[$#files]);
--- 192,196 ----
  		    push(@files,
  			 mhonarc::write_attachment(
! 			    'application/octet-stream', \$uddata, $atdir,
  			    ($usename?$file:''), $inext));
  		    $urlfile = mhonarc::htmlize($files[$#files]);

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV