Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv19140
Modified Files:
mhtxthtml.pl
Log Message:
Changes to close possible XSS exploits.
Index: mhtxthtml.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxthtml.pl,v
retrieving revision 2.31
retrieving revision 2.32
diff -C2 -r2.31 -r2.32
*** mhtxthtml.pl 4 Feb 2003 23:31:20 -0000 2.31
--- mhtxthtml.pl 5 Apr 2003 23:52:20 -0000 2.32
***************
*** 36,43 ****
my $Url = '(\w+://|\w+:)';
! # Script related attributes
! my $SAttr = q/\b(?:onload|onunload|onclick|ondblclick|/.
! q/onmouse(?:down|up|over|move|out)|/.
! q/onkey(?:press|down|up)|style)\b/;
# Script/questionable related elements
my $SElem = q/\b(?:applet|base|embed|form|ilayer|input|layer|link|meta|/.
--- 36,42 ----
my $Url = '(\w+://|\w+:)';
! # Script related attributes: Basically any attribute that starts with "on"
! my $SAttr = q/\bon\w+\b/;
!
# Script/questionable related elements
my $SElem = q/\b(?:applet|base|embed|form|ilayer|input|layer|link|meta|/.
***************
*** 48,52 ****
# URL attributes
my $UAttr = q/\b(?:action|background|cite|classid|codebase|data|datasrc|/.
! q/dynsrc|for|href|longdesc|profile|src|url|usemap)\b/;
# Used to reverse the effects of CHARSETCONVERTERS
--- 47,52 ----
# URL attributes
my $UAttr = q/\b(?:action|background|cite|classid|codebase|data|datasrc|/.
! q/dynsrc|for|href|longdesc|lowsrc|profile|src|url|usemap|/.
! q/vrml)\b/;
# Used to reverse the effects of CHARSETCONVERTERS
***************
*** 103,106 ****
--- 103,115 ----
## subdir Place derived files in a subdirectory
##
+
+ # DEVELOPER's NOTE:
+ # The script stripping code is probably not complete. Since a
+ # whitelist model is not being used -- because full HTML parsing
+ # would be required (and possible reliance on non-standard modules) --
+ # Future scripting extensions added to HTML could get by the filtering.
+ # The FAQ mentions the problems with HTML messages and recommends
+ # disabling HTML in archives.
+
sub filter {
my($fields, $data, $isdecode, $args) = @_;
***************
*** 146,153 ****
}
! ## Check comment declarations: may screw-up mhonarc processing
! ## and avoids someone sneaking in SSIs.
! #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
! $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g; # Just mung them (faster)
## Get/remove title
--- 155,160 ----
}
! ## Unescape ascii letters to simplify strip code
! dehtmlize_ascii($data);
## Get/remove title
***************
*** 218,236 ****
$$data =~ s/(=\s*["']?\s*)(?:\&\{)+/$1/g;
! # Hopefully complete pattern to neutralize javascript:... URLs.
! # The pattern is ugly because we have to handle any combination
! # of regular chars and entity refs.
! $$data =~ s/\b(?:j|&\#(?:0*(?:74|106)|x0*(?:4a|6a))(?:;|(?![0-9])))
! (?:a|&\#(?:0*(?:65|97)|x0*(?:41|61))(?:;|(?![0-9])))
! (?:v|&\#(?:0*(?:86|118)|x0*(?:56|76))(?:;|(?![0-9])))
! (?:a|&\#(?:0*(?:65|97)|x0*(?:41|61))(?:;|(?![0-9])))
! (?:s|&\#(?:0*(?:83|115)|x0*(?:53|73))(?:;|(?![0-9])))
! (?:c|&\#(?:0*(?:67|99)|x0*(?:43|63))(?:;|(?![0-9])))
! (?:r|&\#(?:0*(?:82|114)|x0*(?:52|72))(?:;|(?![0-9])))
! (?:i|&\#(?:0*(?:73|105)|x0*(?:49|69))(?:;|(?![0-9])))
! (?:p|&\#(?:0*(?:80|112)|x0*(?:50|70))(?:;|(?![0-9])))
! (?:t|&\#(?:0*(?:84|116)|x0*(?:54|74))(?:;|(?![0-9])))
! /_javascript_/gix;
!
}
--- 225,236 ----
$$data =~ s/(=\s*["']?\s*)(?:\&\{)+/$1/g;
! # Neutralize javascript:... URLs: Unfortunately, browsers
! # are stupid enough to recognize a javascript URL with whitespace
! # in it (like tabs and newlines).
! $$data =~ s/\bj\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t/_javascript_/gi;
!
! # IE has a very unsecure expression() operator extension to
! # CSS, so we have to nuke it also.
! $$data =~ s/\bexpression\b/_expression_/gi;
}
***************
*** 342,345 ****
--- 342,350 ----
}
+ ## Check comment declarations: may screw-up mhonarc processing
+ ## and avoids someone sneaking in SSIs.
+ #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
+ $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g; # Just mung them (faster)
+
($title.$$data, @files);
}
***************
*** 430,433 ****
--- 435,472 ----
push(@files, $filename); # @files defined in filter!!
$filename;
+ }
+
+ ##---------------------------------------------------------------------------
+
+ sub dehtmlize_ascii {
+ my $str = shift;
+ my $str_r = ref($str) ? $str : \$str;
+
+ $$str_r =~ s{\&\#(\d+);?}{
+ my $n = int($1);
+ if (($n >= 7 && $n <= 13) ||
+ ($n == 32) || ($n == 61) ||
+ ($n >= 48 && $n <= 58) ||
+ ($n >= 64 && $n <= 90) ||
+ ($n >= 97 && $n <= 122)) {
+ pack('C', $n);
+ } else {
+ '&#'.$1.';'
+ }
+ }gex;
+ $$str_r =~ s{\&\#[xX]([0-9abcdefABCDEF]+);?}{
+ my $n = hex($1);
+ if (($n >= 7 && $n <= 13) ||
+ ($n == 32) || ($n == 61) ||
+ ($n >= 48 && $n <= 58) ||
+ ($n >= 64 && $n <= 90) ||
+ ($n >= 97 && $n <= 122)) {
+ pack('C', $n);
+ } else {
+ '&#x'.$1.';'
+ }
+ }gex;
+
+ $$str_r;
}
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV