Update of mhonarc/MHonArc/lib
Modified Files:
mhtxthtml.pl
Log Message:
Bug #32013, #32014: Moved comment stripping to top of function, using
a new comment stripping function that does not use regex. Comment
stripping done first to avoid a false positives in invalid nested
tag check if a '<' exists in a comment declaration.
======================================================================
FILE: mhonarc/MHonArc/lib/mhtxthtml.pl
<http://www.mhonarc.org/cgi-bin/viewcvs.cgi/*checkout*/mhonarc/MHonArc/lib/mhtxthtml.pl?rev=2.40>
<http://www.mhonarc.org/cgi-bin/viewcvs.cgi/mhonarc/MHonArc/lib/mhtxthtml.pl.diff?r1=2.39&r2=2.40&diff_format=h>
--- mhtxthtml.pl 31 Dec 2010 20:34:00 -0000 2.39
+++ mhtxthtml.pl 1 Jan 2011 22:30:40 -0000 2.40
@@ -122,5 +122,13 @@
# Bug-32013 (CVE-2010-4524): Invalid tags cause immediate rejection.
# Bug-32014 (CVE-2010-1677): Prevents DoS if massively nested.
+ my $allowcom = $args =~ /\ballowcomments\b/i;
+ strip_comments($fields, $data) unless $allowcom;
if ($$data =~ /<[^>]*</) {
+ # XXX: This will reject HTML that includes a '<' char in a
+ # comment declaration. Unsure it is worth the hassle
+ # to deal with it. Such scenarios would normally indicate
+ # hand generated HTML vs how most HTML email is generated.
+ # Plus, allowcomments should not be enabled, so they get
+ # removed above.
warn qq/\n/,
qq/Warning: Invalid HTML detected, rejecting\n/,
@@ -153,5 +161,4 @@
my $subdir = $args =~ /\bsubdir\b/i;
my $norelate = $args =~ /\bdisablerelated\b/i;
- my $allowcom = $args =~ /\ballowcomments\b/i;
my $atdir = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
my $tmp;
@@ -361,10 +368,11 @@
}
+ ## NOTE: Comment strip moved to top.
## Check comment declarations: may screw-up mhonarc processing
## and avoids someone sneaking in SSIs.
- if (!$allowcom) {
- #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
- $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g; # Just mung them (faster)
- }
+# if (!$allowcom) {
+# #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
+# $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g; # Just mung them (faster)
+# }
## Prevent comment spam
@@ -468,3 +476,44 @@
##---------------------------------------------------------------------------
+sub strip_comments {
+ my $fields = shift; # for diagnostics
+ my $data = shift; # ref to text to strip
+
+ # We avoid using regex since it can lead to performance problems.
+ # We also do not do full SGML-style comment declarations since it
+ # increases parsing complexity. Here, we just remove any
+ # "<!-- ... -->" strings. Although whitespace is allowed between
+ # final "--" and ">", we do not support it.
+
+ my $n = index($$data, '<!--', 0);
+ if ($n < 0) {
+ # Nothing to do. Good.
+ return $data;
+ }
+
+ my $ret = '';
+ while ($n >= 0) {
+ $ret .= substr($$data, 0, $n);
+ substr($$data, 0, $n) = '';
+ $n = index($$data, '-->', 0);
+ if ($n < 0) {
+ # No end to comment declaration: Warn and strip rest of data.
+ warn qq/\n/,
+ qq/Warning: HTML comment declaration not terminated.\n/,
+ qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
+ qq/ Message Subject: /, $fields->{'x-mha-subject'}, qq/\n/,
+ qq/ Message Number: $mhonarc::MHAmsgnum\n/;
+ $$data = '';
+ last;
+ }
+ substr($$data, 0, $n+3) = '';
+ $n = index($$data, '<!--', 0);
+ }
+ $ret .= $$data;
+ $$data = $ret;
+ $data;
+}
+
+##---------------------------------------------------------------------------
+
1;
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS