mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib mhtxthtml.pl,2.39,2.40

2011-01-01 16:30:44
Update of mhonarc/MHonArc/lib
Modified Files:
	mhtxthtml.pl 
Log Message:
Bug #32013, #32014: Moved comment stripping to top of function, using
a new comment stripping function that does not use regex.  Comment
stripping done first to avoid a false positives in invalid nested
tag check if a '<' exists in a comment declaration.


======================================================================
FILE: mhonarc/MHonArc/lib/mhtxthtml.pl
<http://www.mhonarc.org/cgi-bin/viewcvs.cgi/*checkout*/mhonarc/MHonArc/lib/mhtxthtml.pl?rev=2.40>

<http://www.mhonarc.org/cgi-bin/viewcvs.cgi/mhonarc/MHonArc/lib/mhtxthtml.pl.diff?r1=2.39&r2=2.40&diff_format=h>
--- mhtxthtml.pl	31 Dec 2010 20:34:00 -0000	2.39
+++ mhtxthtml.pl	1 Jan 2011 22:30:40 -0000	2.40
@@ -122,5 +122,13 @@
     # Bug-32013 (CVE-2010-4524): Invalid tags cause immediate rejection.
     # Bug-32014 (CVE-2010-1677): Prevents DoS if massively nested.
+    my $allowcom = $args =~ /\ballowcomments\b/i;
+    strip_comments($fields, $data)  unless $allowcom;
     if ($$data =~ /<[^>]*</) {
+      # XXX: This will reject HTML that includes a '<' char in a
+      #      comment declaration.  Unsure it is worth the hassle
+      #      to deal with it.  Such scenarios would normally indicate
+      #      hand generated HTML vs how most HTML email is generated.
+      #      Plus, allowcomments should not be enabled, so they get
+      #      removed above.
       warn qq/\n/,
            qq/Warning: Invalid HTML detected, rejecting\n/,
@@ -153,5 +161,4 @@
     my $subdir   = $args =~ /\bsubdir\b/i;
     my $norelate = $args =~ /\bdisablerelated\b/i;
-    my $allowcom = $args =~ /\ballowcomments\b/i;
     my $atdir    = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
     my $tmp;
@@ -361,10 +368,11 @@
     }
 
+    ## NOTE: Comment strip moved to top.
     ## Check comment declarations: may screw-up mhonarc processing
     ## and avoids someone sneaking in SSIs.
-    if (!$allowcom) {
-      #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
-      $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g;  # Just mung them (faster)
-    }
+#   if (!$allowcom) {
+#     #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
+#     $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g;  # Just mung them (faster)
+#   }
 
     ## Prevent comment spam
@@ -468,3 +476,44 @@
 ##---------------------------------------------------------------------------
 
+sub strip_comments {
+  my $fields = shift;    # for diagnostics
+  my $data = shift;      # ref to text to strip
+
+  # We avoid using regex since it can lead to performance problems.
+  # We also do not do full SGML-style comment declarations since it
+  # increases parsing complexity.  Here, we just remove any
+  # "<!-- ... -->" strings.  Although whitespace is allowed between
+  # final "--" and ">", we do not support it.
+  
+  my $n = index($$data, '<!--', 0);
+  if ($n < 0) {
+    # Nothing to do.  Good.
+    return $data;
+  }
+
+  my $ret = '';
+  while ($n >= 0) {
+    $ret .= substr($$data, 0, $n);
+    substr($$data, 0, $n) = '';
+    $n = index($$data, '-->', 0);
+    if ($n < 0) {
+      # No end to comment declaration: Warn and strip rest of data.
+      warn qq/\n/,
+           qq/Warning: HTML comment declaration not terminated.\n/,
+           qq/         Message-Id: <$mhonarc::MHAmsgid>\n/,
+           qq/         Message Subject: /, $fields->{'x-mha-subject'}, qq/\n/,
+           qq/         Message Number: $mhonarc::MHAmsgnum\n/;
+      $$data = '';
+      last;
+    }
+    substr($$data, 0, $n+3) = '';
+    $n = index($$data, '<!--', 0);
+  }
+  $ret .= $$data;
+  $$data = $ret;
+  $data;
+}
+
+##---------------------------------------------------------------------------
+
 1;

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS