mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib mhexternal.pl,2.11,2.12 mhtxthtml.pl,2.21,2.22 rea...

2002-10-10 18:58:00
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv15114

Modified Files:
	mhexternal.pl mhtxthtml.pl readmail.pl 
Log Message:
* Better handling of MHTML messages.  Changes allow better handling
  of Content-Location and Content-Base headers.


Index: mhexternal.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhexternal.pl,v
retrieving revision 2.11
retrieving revision 2.12
diff -C2 -r2.11 -r2.12
*** mhexternal.pl	25 Sep 2002 03:55:44 -0000	2.11
--- mhexternal.pl	11 Oct 2002 01:57:53 -0000	2.12
***************
*** 291,294 ****
--- 291,310 ----
  	}
      }
+ 
+     # Mark part filtered
+     my $cid = $fields->{'content-id'}[0]
+ 	if (defined($fields->{'content-id'}));
+     if (defined($cid)) {
+ 	$cid =~ s/[\s<>]//g;
+ 	$cid = 'cid:'.$cid;
+     } elsif (defined($fields->{'content-location'})) {
+ 	$cid = $fields->{'content-location'}[0];
+ 	$cid =~ s/['"\s]//g;
+     }
+     if (defined($cid) && defined($readmail::Cid{$cid})) {
+ 	$readmail::Cid{$cid}->{'filtered'} = 1;
+ 	$readmail::Cid{$cid}->{'uri'} = $filename;
+     }
+ 
      ($ret, $path || $filename);
  }

Index: mhtxthtml.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxthtml.pl,v
retrieving revision 2.21
retrieving revision 2.22
diff -C2 -r2.21 -r2.22
*** mhtxthtml.pl	4 Sep 2002 04:09:30 -0000	2.21
--- mhtxthtml.pl	11 Oct 2002 01:57:53 -0000	2.22
***************
*** 176,189 ****
  	$$data =~ s/(=\s*["']?\s*)\&\{/$1/g;
      }
      
!     if ($onlycid) {
! 	# quoted attributes
!         $$data =~ s/($AElem[^>]+$UAttr\s*=\s*['"])([^'"]+)(['"])
! 		   /&preserve_cid($1, $2, $3)
! 		   /geoix;
! 	# not-quoted attributes
!         $$data =~ s/($AElem[^>]+$UAttr\s*=\s*)([^'"\s>][^\s>]*)
! 		   /&preserve_cid($1, $2, "")
! 		   /geoix;
      }
  
--- 176,190 ----
  	$$data =~ s/(=\s*["']?\s*)\&\{/$1/g;
      }
+ 
+     ## Modify relative urls to absolute using BASE
+     if ($base =~ /\S/) {
+         $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
+ 		   join("", $1, &addbase($base,$2), $3)/geoix;
+     }
      
!     ## Check for frames: Do not support, so just show source
!     if ($$data =~ m/<frameset\b/i) {
! 	$$data = join('', '<pre>', mhonarc::htmlize($$data), '</pre>');
! 	return ($title.$$data, @files);
      }
  
***************
*** 208,217 ****
  		     if $attr{'bgcolor'};
  	    if ($attr{'background'}) {
! 		if ($attr{'background'} =~ /^cid:/i) {
! 		    $attr{'background'} = &resolve_cid($attr{'background'});
! 		} else {
! 		    $attr{'background'} = &addbase($base, $attr{'background'});
  		}
- 		$tpre .= qq|background-image: url($attr{'background'}) |;
  	    }
  	    $tpre .= qq|color: $attr{'text'}; |
--- 209,216 ----
  		     if $attr{'bgcolor'};
  	    if ($attr{'background'}) {
! 		if ($attr{'background'} =
! 			&resolve_cid($onlycid, $attr{'background'})) {
! 		    $tpre .= qq|background-image: url($attr{'background'}) |;
  		}
  	    }
  	    $tpre .= qq|color: $attr{'text'}; |
***************
*** 234,248 ****
      $$data =~ s|</?body[^>]*>||ig;
  
-     ## Modify relative urls to absolute using BASE
-     if ($base =~ /\S/) {
-         $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
- 		   join("", $1, &addbase($base,$2), $3)/geoix;
-     }
- 
      ## Check for CID URLs (multipart/related HTML)
      $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
! 	       join("", $1, &resolve_cid($2), $3)/geoix;
      $$data =~ s/($UAttr\s*=\s*)([^'">][^\s>]+)/
! 	       join("", $1, '"', &resolve_cid($2), '"')/geoix;
  
      ($title.$$data, @files);
--- 233,241 ----
      $$data =~ s|</?body[^>]*>||ig;
  
      ## Check for CID URLs (multipart/related HTML)
      $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
! 	       join("", $1, &resolve_cid($onlycid, $2), $3)/geoix;
      $$data =~ s/($UAttr\s*=\s*)([^'">][^\s>]+)/
! 	       join("", $1, '"', &resolve_cid($onlycid, $2), '"')/geoix;
  
      ($title.$$data, @files);
***************
*** 285,291 ****
  
  sub resolve_cid {
      my $cid = shift;
      my $href = $readmail::Cid{$cid};
!     if (!defined($href)) { return ($cid =~ /^cid:/i)? "": $cid; }
  
      require 'mhmimetypes.pl';
--- 278,298 ----
  
  sub resolve_cid {
+     my $onlycid = shift;
      my $cid = shift;
      my $href = $readmail::Cid{$cid};
!     if (!defined($href)) {
! 	my $basename = $cid;
! 	$basename =~ s/.*\///;
! 	if (!defined($href = $readmail::Cid{$basename})) {
! 	    return ""  if $onlycid;
! 	    return ($cid =~ /^cid:/i)? "": $cid;
! 	}
! 	$cid = $basename;
!     }
! 
!     if ($href->{'uri'}) {
! 	# Part already converted; multiple references to part
! 	return $href->{'uri'};
!     }
  
      require 'mhmimetypes.pl';
***************
*** 304,323 ****
      }
      $href->{'filtered'} = 1; # mark part filtered for readmail.pl
      push(@files, $filename); # @files defined in filter!!
      $filename;
  }
  
- ##---------------------------------------------------------------------------
- 
- sub preserve_cid {
-     my $pre = shift;
-     my $url = shift;
-     my $post = shift;
-     if ($url =~ /^cid:/i) {
- 	$pre . $url . $post;
-     } else {
- 	$pre . 'javascript:void(0);' . $post;
-     }
- }
  ##---------------------------------------------------------------------------
  
--- 311,320 ----
      }
      $href->{'filtered'} = 1; # mark part filtered for readmail.pl
+     $href->{'uri'}      = $filename;
+ 
      push(@files, $filename); # @files defined in filter!!
      $filename;
  }
  
  ##---------------------------------------------------------------------------
  

Index: readmail.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/readmail.pl,v
retrieving revision 2.19
retrieving revision 2.20
diff -C2 -r2.19 -r2.20
*** readmail.pl	10 Oct 2002 22:21:32 -0000	2.19
--- readmail.pl	11 Oct 2002 01:57:53 -0000	2.20
***************
*** 48,51 ****
--- 48,53 ----
  ###############################################################################
  
+ my $Url	          = '(\w+://|\w+:)';
+ 
  my @_MIMEAltPrefs = ();
  my %_MIMEAltPrefs = ();
***************
*** 436,440 ****
  
      my($type, $subtype, $boundary, $content, $ctype, $pos,
!        $encoding, $decodefunc, $args, $part);
      my(@parts) = ();
      my(@files) = ();
--- 438,442 ----
  
      my($type, $subtype, $boundary, $content, $ctype, $pos,
!        $encoding, $decodefunc, $args, $part, $uribase);
      my(@parts) = ();
      my(@files) = ();
***************
*** 463,466 ****
--- 465,478 ----
      }
  
+     ## Get entity URI base
+     if (defined($fields->{'content-base'}) &&
+ 	    ($uribase = $fields->{'content-base'}[0])) {
+ 	$uribase =~ s/['"\s]//g;
+     } elsif (defined($fields->{'content-location'}) &&
+ 		($uribase = $fields->{'content-location'}[0])) {
+ 	$uribase =~ s/['"\s]//g;
+     }
+     $uribase =~ s|(.*/).*|$1|  if $uribase;
+ 
      ## Load content-type filter
      if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
***************
*** 613,624 ****
  			$Cid{"cid:$cid"} = $href  if $cid =~ /\S/;
  		    }
  		    if (defined($partfields->{'content-location'}) &&
  			    ($cid = $partfields->{'content-location'}[0])) {
! 			$cid =~ s/^\s+//;
! 			$cid =~ s/\s+$//;
  			if ($cid =~ /\S/ && !$Cid{$cid}) {
  			    $Cid{$cid} = $href;
  			}
  		    }
  		}
  	    }
--- 625,646 ----
  			$Cid{"cid:$cid"} = $href  if $cid =~ /\S/;
  		    }
+ 		    $cid = undef;
  		    if (defined($partfields->{'content-location'}) &&
  			    ($cid = $partfields->{'content-location'}[0])) {
! 			my $partbase = $uribase;
! 			$cid =~ s/['"\s]//g;
! 			if (defined($partfields->{'content-base'})) {
! 			    $partbase = $partfields->{'content-base'}[0];
! 			}
! 			$cid = apply_base_url($partbase, $cid);
  			if ($cid =~ /\S/ && !$Cid{$cid}) {
  			    $Cid{$cid} = $href;
  			}
  		    }
+ 		    if ($cid) {
+ 			$partfields->{'content-location'} = [ $cid ];
+ 		    } elsif (!defined($partfields->{'content-base'})) {
+ 			$partfields->{'content-base'} = [ $uribase ];
+ 		    }
  		}
  	    }
***************
*** 1105,1108 ****
--- 1127,1163 ----
      $_[0] =~ m|^\s*([\w\-\./]+)|;
      lc($1);
+ }
+ 
+ ##---------------------------------------------------------------------------##
+ 
+ sub apply_base_url {
+     my($b, $u) = @_;
+     return $u  if !defined($b) || $b !~ /\S/;
+ 
+     my($ret);
+     $u =~ s/^\s+//;
+     if ($u =~ m%^$Url%o || $u =~ m/^#/) {
+ 	## Absolute URL or scroll link; do nothing
+         $ret = $u;
+     } else {
+ 	## Relative URL
+ 	if ($u =~ /^\./) {
+ 	    ## "./---" or "../---": Need to remove and adjust base
+ 	    ## accordingly.
+ 	    $b =~ s/\/$//;
+ 	    my @a = split(/\//, $b);
+ 	    my $cnt = 0;
+ 	    while ( $cnt <= scalar(@a) &&
+ 		    $u =~ s|^(\.{1,2})/|| ) { ++$cnt  if length($1) == 2; }
+ 	    splice(@a, -$cnt)  if $cnt > 0;
+ 	    $b = join('/', @a, "");
+ 
+ 	} elsif ($u =~ m%^/%) {
+ 	    ## "/---": Just use hostname:port of base.
+ 	    $b =~ s%^(${Url}[^/]*)/.*%$1%o;
+ 	}
+         $ret = $b . $u;
+     }
+     $ret;
  }
  

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV