mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib mhtxtplain.pl,2.31,2.32

2002-12-04 12:30:10
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv20990/lib

Modified Files:
	mhtxtplain.pl 
Log Message:
* Changed code for flowed and fancy to avoid perl crashing on very
  large messages.  Crashes occured for perl v5.6.1 and v5.8.0 under
  linux.  Assuming that crash is not unique to linux.  Checking
  with gdb, it appears perl gets caught in a very deep recursive
  loop.
  Also, new code may actually be faster, but no testing done to
  confirm.


Index: mhtxtplain.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtplain.pl,v
retrieving revision 2.31
retrieving revision 2.32
diff -C2 -r2.31 -r2.32
*** mhtxtplain.pl	23 Nov 2002 04:10:40 -0000	2.31
--- mhtxtplain.pl	4 Dec 2002 19:29:54 -0000	2.32
***************
*** 345,426 ****
      }
  
!     if ($quote_style == Q_FLOWED) {
! 	# Initial code for format=flowed contributed by Ken Hirsch (May 2002).
! 	# text/plain; format=flowed defined in RFC2646
  
  	my $currdepth = 0;
  	my $ret='';
  	$$data =~ s!^</?x-flowed>\n!!mg;
  	while (length($$data) > 0) {
! 	    my($qd) = $$data =~ /^((?:&gt;)*)/;
! 	    if ($$data =~ s/^(.*(?:\n$qd(?!&gt;).*)*\n?)//) {
! 		# divide message into chunks by "quote-depth",
! 		# which is the number of leading > signs
! 		my $chunk = $1;
! 		$chunk =~ s/^$qd ?//mg;  # N.B. also takes care of
! 					 # space-stuffing
! 		$chunk =~ s/^-- $/--/mg; # special case for '-- '
! 
! 		my @paras = split(/(\n\n)/, $chunk);
! 		my $para;
! 		$chunk = '';
! 		foreach $para (@paras) {
! 		    if ($para =~ /\A\n+\Z/) {
! 			$chunk .= "<br>\n" x length($para);
! 			next;
  		    }
- 		    $para =~ s/^\n/<br>/;
- 		    my $nls = ($para =~ tr/\n/\n/);
- 		    if (($para =~ / \n/) || ($para =~ / \Z/) ||
- 			    ($nls < 1) ||
- 			    (($nls == 1) && ($para =~ /\S/)
- 			     && ($para =~ /\n\Z/))) {
- 			# flowed format
- 			$para =~ s/^(|.*[^ ])(\n)(?!\Z)/
- 				   ($keepspace ? &preserve_space($1) : $1) .
- 				   '<br>'.$2/mgex;
- 			if ($nonfixed) {
- 			    $chunk .= $para;
- 			} else {
- 			    $chunk .= '<tt>'.$para.'</tt>';
- 			}
  
! 		    } else {
! 			# fixed format
! 			$para =~ s/^(.*)$
! 				  /&break_line($1,
! 				      $maxwidth+(length($1)-&html_length($1)))
! 				  /gemx
! 			    if $maxwidth > 0;
! 			if ($nonfixed) {
! 			    $para =~ s/(\n)/<br>$1/g;
! 			    if ($keepspace) {
! 				$para =~ s/^(.*)$/&preserve_space($1)/gem;
! 			    }
! 			    $chunk .= $para;
! 			} else {
! 			    $chunk .= $startfixq . $para . $endfixq;
  			}
  		    }
  		}
  
! 		my $newdepth = length($qd)/length('&gt;');
! 		if ($currdepth < $newdepth) {
! 		    $chunk = $startq x ($newdepth - $currdepth) . $chunk;
! 		} elsif ($currdepth > $newdepth) {
! 		    $chunk = $endq   x ($currdepth - $newdepth) . $chunk;
! 		}
! 		$currdepth = $newdepth;
! 		$ret .= $chunk;
! 
! 	    } else {
! 		# The above regex should always match, but just in case...
! 		warn qq/\n/,
! 		     qq/Warning: Dequoting problem with format=flowed data\n/,
! 		     qq/         Message-Id: <$MHAmsgid>\n/,
! 		     qq/         Message Number: $MHAmsgnum\n/;
! 		$ret .= '<pre>' . $$data . '</pre>';
! 		last;
  	    }
  	}
  	if ($currdepth > 0) {
--- 345,447 ----
      }
  
!     # XXX: Initial algorithms for flowed and fancy processing
!     # used the s/// operator.  However, for large messages, this could
!     # cause perl to crash (seg fault) (verified with perl v5.6.1 and
!     # v5.8.0).  Hence, code changed to use m//g and substr(), which
!     # appears to avoid perl crashing (ehood, Dec 2002).
!     #
!     # Initial code for format=flowed contributed by Ken Hirsch (May 2002).
!     # text/plain; format=flowed defined in RFC2646
  
+     if ($quote_style == Q_FLOWED) {
+ 	my($chunk, $qd, $offset);
  	my $currdepth = 0;
  	my $ret='';
  	$$data =~ s!^</?x-flowed>\n!!mg;
  	while (length($$data) > 0) {
! 	    # Divide message into chunks by "quote-depth",
! 	    # which is the number of leading > signs
! 	    ($qd) = $$data =~ /^((?:&gt;)*)/;
! 	    $chunk = '';
! 	    pos($$data) = 0;
! 	    if ($qd eq '') {
! 		# Non-quoted text: We special case this since we can
! 		# use a fixed pattern to grab the chunk.
! 		if ($$data =~ /^(?=&gt;)/mgo) {
! 		    $offset = pos($$data);
! 		    $chunk = substr($$data, 0, $offset);
! 		    substr($$data, 0, $offset) = '';
! 		} else {
! 		    $chunk = $$data;
! 		    $$data = '';
! 		}
! 	    } else {
! 		# Quoted text: It would be nice to not have
! 		# to compile a new pattern each time.
! 		if ($$data =~ /^(?!$qd(?!&gt;))/mg) {
! 		    $offset = pos($$data);
! 		    $chunk = substr($$data, 0, $offset);
! 		    substr($$data, 0, $offset) = '';
! 		} else {
! 		    $chunk = $$data;
! 		    $$data = '';
! 		}
! 		$chunk =~ s/^$qd_re//mg;
! 	    }
! 	    $chunk =~ s/^$qd ?//mg;  # N.B. also takes care of
! 				     # space-stuffing
! 	    $chunk =~ s/^-- $/--/mg; # special case for '-- '
! 
! 	    my @paras = split(/(\n\n)/, $chunk);
! 	    my $para;
! 	    $chunk = '';
! 	    foreach $para (@paras) {
! 		if ($para =~ /\A\n+\Z/) {
! 		    $chunk .= "<br>\n" x length($para);
! 		    next;
! 		}
! 		$para =~ s/^\n/<br>/;
! 		my $nls = ($para =~ tr/\n/\n/);
! 		if (($para =~ / \n/) || ($para =~ / \Z/) ||
! 			($nls < 1) ||
! 			(($nls == 1) && ($para =~ /\S/)
! 			 && ($para =~ /\n\Z/))) {
! 		    # flowed format
! 		    $para =~ s/^(|.*[^ ])(\n)(?!\Z)/
! 			       ($keepspace ? &preserve_space($1) : $1) .
! 			       '<br>'.$2/mgex;
! 		    if ($nonfixed) {
! 			$chunk .= $para;
! 		    } else {
! 			$chunk .= '<tt>'.$para.'</tt>';
  		    }
  
! 		} else {
! 		    # fixed format
! 		    $para =~ s/^(.*)$
! 			      /&break_line($1,
! 				  $maxwidth+(length($1)-&html_length($1)))
! 			      /gemx
! 			if $maxwidth > 0;
! 		    if ($nonfixed) {
! 			$para =~ s/(\n)/<br>$1/g;
! 			if ($keepspace) {
! 			    $para =~ s/^(.*)$/&preserve_space($1)/gem;
  			}
+ 			$chunk .= $para;
+ 		    } else {
+ 			$chunk .= $startfixq . $para . $endfixq;
  		    }
  		}
+ 	    }
  
! 	    my $newdepth = length($qd)/length('&gt;');
! 	    if ($currdepth < $newdepth) {
! 		$chunk = $startq x ($newdepth - $currdepth) . $chunk;
! 	    } elsif ($currdepth > $newdepth) {
! 		$chunk = $endq   x ($currdepth - $newdepth) . $chunk;
  	    }
+ 	    $currdepth = $newdepth;
+ 	    $ret .= $chunk;
  	}
  	if ($currdepth > 0) {
***************
*** 434,476 ****
  
      } elsif ($quote_style == Q_FANCY) {
  	my $currdepth = 0;
  	my $ret='';
  	while (length($$data) > 0) {
! 	    my($qd) = $$data =~ /^((?:[ ]?(?:${HQuoteChars})[ ]?)*)/o;
! 	    my($qd_re) = "\Q$qd\E";
! 	       $qd_re  =~ s/\\ /[ ]?/g;
! 	    if ($$data =~ s/\A(.*(?:\n$qd_re
! 			    (?![ ]?(?:${HQuoteChars}[ ]?)).*)*\n?)//x) {
! 		my $chunk = $1;
! 		$chunk =~ s/^$qd_re//mg;
! 		if ($nonfixed) {
! 		    $chunk =~ s/(\n)/<br>$1/g;
! 		    if ($keepspace) {
! 			$chunk =~ s/^(.*)$/&preserve_space($1)/gem;
! 		    }
  		} else {
! 		    $chunk =~ s/\n/<br\n>/g;
! 		    $chunk = $startfixq . $chunk . $endfixq;
  		}
! 
! 		$qd =~ s/\s+//g;
! 		my $newdepth = html_length($qd);
! 		if ($currdepth < $newdepth) {
! 		    $chunk = $startq x ($newdepth - $currdepth) . $chunk;
! 		} elsif ($currdepth > $newdepth) {
! 		    $chunk = $endq   x ($currdepth - $newdepth) . $chunk;
  		}
- 		$currdepth = $newdepth;
- 		$ret .= $chunk;
- 
  	    } else {
! 		# The above regex should always match, but just in case...
! 		warn qq/\n/,
! 		     qq/Warning: Dequoting problem with entity data\n/,
! 		     qq/         Message-Id: <$MHAmsgid>\n/,
! 		     qq/         Message Number: $MHAmsgnum\n/;
! 		$ret .= '<pre>' . $$data . '</pre>';
! 		last;
  	    }
  	}
  	if ($currdepth > 0) {
--- 455,518 ----
  
      } elsif ($quote_style == Q_FANCY) {
+ 	# Fancy code very similiar to flowed code, but simplier.
+ 	# Some stuff is "duplicated", but this is written to use
+ 	# ${HQuoteChars}, which would allow for alternate
+ 	# quote characters beyond '>'.
+ 	my($chunk, $qd, $qd_re, $offset);
  	my $currdepth = 0;
  	my $ret='';
+ 
+ 	# Compress '>'s to have not spacing, makes latter patterns
+ 	# simplier.
+ 	$$data =~ s/(?:^|\G(${HQuoteChars}))[ ]?/$1/gm;
  	while (length($$data) > 0) {
! 	    ($qd) = $$data =~ /\A((?:${HQuoteChars})*)/o;
! 	    $chunk = '';
! 	    pos($$data) = 0;
! 	    if ($qd eq '') {
! 		# Non-quoted text: We special case this since we can
! 		# use a fixed pattern to grab the chunk.
! 		if ($$data =~ /^(?=${HQuoteChars})/mgo) {
! 		    $offset = pos($$data);
! 		    $chunk = substr($$data, 0, $offset);
! 		    substr($$data, 0, $offset) = '';
  		} else {
! 		    $chunk = $$data;
! 		    $$data = '';
  		}
! 	    } else {
! 		# Quoted text: Make sure any regex specials are escaped
! 		# before using in pattern.  It would be nice to not have
! 		# to compile a new pattern each time.
! 		$qd_re = "\Q$qd\E";
! 		if ($$data =~ /^(?!$qd_re(?!${HQuoteChars}))/mg) {
! 		    $offset = pos($$data);
! 		    $chunk = substr($$data, 0, $offset);
! 		    substr($$data, 0, $offset) = '';
! 		} else {
! 		    $chunk = $$data;
! 		    $$data = '';
! 		}
! 		$chunk =~ s/^$qd_re//mg;
! 	    }
! 	    if ($nonfixed) {
! 		$chunk =~ s/(\n)/<br>$1/g;
! 		if ($keepspace) {
! 		    $chunk =~ s/^(.*)$/&preserve_space($1)/gem;
  		}
  	    } else {
! 		$chunk =~ s/\n/<br\n>/g;
! 		$chunk = $startfixq . $chunk . $endfixq;
! 	    }
! 
! 	    $qd =~ s/\s+//g;
! 	    my $newdepth = html_length($qd);
! 	    if ($currdepth < $newdepth) {
! 		$chunk = $startq x ($newdepth - $currdepth) . $chunk;
! 	    } elsif ($currdepth > $newdepth) {
! 		$chunk = $endq   x ($currdepth - $newdepth) . $chunk;
  	    }
+ 	    $currdepth = $newdepth;
+ 	    $ret .= $chunk;
  	}
  	if ($currdepth > 0) {

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV