CVSROOT: /cvsroot/mhonarc
Module name: mhonarc
Branch:
Changes by: Earl Hood <earl(_at_)earlhood(_dot_)com> 03/10/17 18:08:45
Modified files:
MHonArc/lib : mhtxthtml.pl mhtxtplain.pl readmail.pl
Log message:
* [bug #5905]: Redid readmail::apply_base_url to be more robust and
to deal with invalid base URLs. Also make sure to not invoke splice
with beyond range offset.
mhtxthtml.pl changed to use readmail::apply_base_url instead of
its own version addbase() since both basically do the same thing.
The bug was actually discovered in m2h_text_html::addbase(), but
the readmail version had the same problem.
* Removed private $Url declarations in favor of readmail global
$UrlRxStr. $UrlRxStr reflects the latest list of registered URL
schemes (which may be overkill for mhonarc's needs).
Patches:
Index: mhonarc/MHonArc/lib/mhtxthtml.pl
diff -u mhonarc/MHonArc/lib/mhtxthtml.pl:2.35 mhonarc/MHonArc/lib/mhtxthtml.pl:2.36
--- mhonarc/MHonArc/lib/mhtxthtml.pl:2.35 Mon Sep 29 01:03:57 2003
+++ mhonarc/MHonArc/lib/mhtxthtml.pl Fri Oct 17 18:08:45 2003
@@ -1,6 +1,6 @@
##---------------------------------------------------------------------------##
## File:
-## $Id: mhtxthtml.pl,v 2.35 2003/09/29 05:03:57 ehood Exp $
+## $Id: mhtxthtml.pl,v 2.36 2003/10/17 22:08:45 ehood Exp $
## Author:
## Earl Hood mhonarc(_at_)mhonarc(_dot_)org
## Description:
@@ -32,9 +32,6 @@
package m2h_text_html;
-# Beginning of URL match expression
-my $Url = '(\w+://|\w+:)';
-
# Script related attributes: Basically any attribute that starts with "on"
my $SAttr = q/\bon\w+\b/;
@@ -238,7 +235,7 @@
## Modify relative urls to absolute using BASE
if ($base =~ /\S/) {
$$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
- join("", $1, &addbase($base,$2), $3)/geoix;
+ join("", $1, readmail::apply_base_url($base,$2), $3)/geoix;
}
## Check for frames: Do not support, so just show source
@@ -350,39 +347,6 @@
}
($title.$$data, @files);
-}
-
-##---------------------------------------------------------------------------
-
-sub addbase {
- my($b, $u) = @_;
- return $u if !defined($b) || $b !~ /\S/;
-
- my($ret);
- $u =~ s/^\s+//;
- if ($u =~ m%^$Url%o || $u =~ m/^#/) {
- ## Absolute URL or scroll link; do nothing
- $ret = $u;
- } else {
- ## Relative URL
- if ($u =~ /^\./) {
- ## "./---" or "../---": Need to remove and adjust base
- ## accordingly.
- $b =~ s/\/$//;
- my @a = split(/\//, $b);
- my $cnt = 0;
- while ( $cnt <= scalar(@a) &&
- $u =~ s|^(\.{1,2})/|| ) { ++$cnt if length($1) == 2; }
- splice(@a, -$cnt) if $cnt > 0;
- $b = join('/', @a, "");
-
- } elsif ($u =~ m%^/%) {
- ## "/---": Just use hostname:port of base.
- $b =~ s%^(${Url}[^/]*)/.*%$1%o;
- }
- $ret = $b . $u;
- }
- $ret;
}
##---------------------------------------------------------------------------
Index: mhonarc/MHonArc/lib/mhtxtplain.pl
diff -u mhonarc/MHonArc/lib/mhtxtplain.pl:2.42 mhonarc/MHonArc/lib/mhtxtplain.pl:2.43
--- mhonarc/MHonArc/lib/mhtxtplain.pl:2.42 Tue Sep 30 16:53:59 2003
+++ mhonarc/MHonArc/lib/mhtxtplain.pl Fri Oct 17 18:08:45 2003
@@ -1,6 +1,6 @@
##---------------------------------------------------------------------------##
## File:
-## $Id: mhtxtplain.pl,v 2.42 2003/09/30 20:53:59 ehood Exp $
+## $Id: mhtxtplain.pl,v 2.43 2003/10/17 22:08:45 ehood Exp $
## Author:
## Earl Hood mhonarc(_at_)mhonarc(_dot_)org
## Description:
@@ -39,11 +39,11 @@
sub Q_FANCY() { 2; }
sub Q_FLOWED() { 3; }
-$Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
- '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
-$UrlExp = $Url . q/[^\s\(\)\|<>"'\0-\037]+/ .
+$UrlExp = $readmail::UrlRxStr .
+ q/[^\s\(\)\|<>"'\0-\037]+/ .
q/[^\.?!;,"'\|\[\]\(\)\s<>\0-\037]/;
-$HUrlExp = $Url . q/(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&\0-\037])+/ .
+$HUrlExp = $readmail::UrlRxStr .
+ q/(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&\0-\037])+/ .
q/[^\.?!;,"'\|\[\]\(\)\s<>\&\0-\037]/;
$QuoteChars = '[>]';
$HQuoteChars = '>';
Index: mhonarc/MHonArc/lib/readmail.pl
diff -u mhonarc/MHonArc/lib/readmail.pl:2.33 mhonarc/MHonArc/lib/readmail.pl:2.34
--- mhonarc/MHonArc/lib/readmail.pl:2.33 Sat Aug 2 02:04:47 2003
+++ mhonarc/MHonArc/lib/readmail.pl Fri Oct 17 18:08:45 2003
@@ -1,8 +1,8 @@
##---------------------------------------------------------------------------##
## File:
-## $Id: readmail.pl,v 2.33 2003/08/02 06:04:47 ehood Exp $
+## $Id: readmail.pl,v 2.34 2003/10/17 22:08:45 ehood Exp $
## Author:
-## Earl Hood mhonarc(_at_)mhonarc(_dot_)org
+## Earl Hood mhonarc AT mhonarc DOT org
## Description:
## Library defining routines to parse MIME e-mail messages. The
## library is designed so it may be reused for other e-mail
@@ -24,7 +24,7 @@
## $parm_hash_ref = MAILparse_parameter_str($header_field, 1);
##
##---------------------------------------------------------------------------##
-## Copyright (C) 1996-2002 Earl Hood, mhonarc(_at_)mhonarc(_dot_)org
+## Copyright (C) 1996-2003 Earl Hood, mhonarc AT mhonarc DOT org
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -48,8 +48,7 @@
## Private Globals ##
###############################################################################
-my $Url = '(\w+://|\w+:)';
-
+#my $Url = '(\w+://|\w+:)';
my @_MIMEAltPrefs = ();
my %_MIMEAltPrefs = ();
@@ -61,6 +60,14 @@
## Constants
##
+## String for matching the start of a URL
+$UrlRxStr =
+ '(?:(?:https?|ftp|afs|wais|telnet|ldap|gopher|z39\.50[rs]|vemmi|imap|'.
+ 'nfs|acap|rtspu?|tip|pop|sip|(?:soap|xmlrpc)\.beeps?|go|ipp|'.
+ 'tftp)://|'.
+ 'news:(?://)?|'.
+ '(?:nntp|mid|cid|mailto|prospero|data|service|tel|fax|modem|h\.323):)';
+
## Constants for use as second argument to MAILdecode_1522_str().
sub JUST_DECODE() { 1; }
sub DECODE_ALL() { 2; }
@@ -1250,37 +1257,76 @@
}
##---------------------------------------------------------------------------##
-
+## apply_base_url(): Convert a relative URL to a full URL with
+## specific base;
+##
sub apply_base_url {
- my($b, $u) = @_;
+ my $b = shift; # Base URL
+ my $u = shift; # URL to apply base to
+
+ ## If no base, nothing to do
return $u if !defined($b) || $b !~ /\S/;
- my($ret);
+ ## If absolute URL or scroll link; do nothing
$u =~ s/^\s+//;
- if ($u =~ m%^$Url%o || $u =~ m/^#/) {
- ## Absolute URL or scroll link; do nothing
- $ret = $u;
+ if ($u =~ /^$UrlRxStr/o || $u =~ m/^#/) {
+ return $u;
+ }
+
+ ## Check if base URL allows relative resolution
+ my($host_part, $scheme);
+ if ($b =~ s{^((https?|ftp|file|nfs|acap|tftp)://[\w\-:\d(_dot_)\(_at_)\%=~&]+)/?}{}) {
+ $host_part = $1;
+ $scheme = lc $2;
} else {
- ## Relative URL
- if ($u =~ /^\./) {
- ## "./---" or "../---": Need to remove and adjust base
- ## accordingly.
- $b =~ s/\/$//;
- my @a = split(/\//, $b);
- my $cnt = 0;
- while ( $cnt <= scalar(@a) &&
- $u =~ s|^(\.{1,2})/|| ) { ++$cnt if length($1) == 2; }
- splice(@a, -$cnt) if $cnt > 0;
- $b = join('/', @a, "");
-
- } elsif ($u =~ m%^/%) {
- ## "/---": Just use hostname:port of base.
- $b =~ s%^(${Url}[^/]*)/.*%$1%o;
+ warn qq/Warning: Invalid base url, "$b" to apply to "$u"\n/;
+ return $u;
+ }
+
+ ## If "/---", just use hostname:port of base.
+ if ($u =~ /^\//) {
+ return $host_part . $u;
+ }
+
+ ## Clean up base URL
+ SCHEME: {
+ if ($scheme eq 'http' || $scheme eq 'https' || $scheme eq 'acap') {
+ $b =~ s/\?.*$//;
+ last SCHEME;
+ }
+ if ($scheme eq 'ftp') {
+ $b =~ s/;type=.$//;
+ last SCHEME;
+ }
+ if ($scheme eq 'tftp') {
+ $b =~ s/;mode=\w+$//;
+ last SCHEME;
}
- $ret = $b . $u;
}
- $ret;
+ $b =~ s/\/$//; # strip any trailing '/' (we add it back later)
+
+ ## "./---" or "../---": Need to remove and adjust base accordingly.
+ my $cnt = 0;
+ while ( $u =~ s|^(\.{1,2})/|| ) { ++$cnt if length($1) == 2; }
+
+ if ($b eq '') {
+ # base is just host
+ return join('/', $host_part, $u);
+ }
+ if ($cnt > 0) {
+ # trim path
+ my @a = split(/\//, $b);
+ if ($cnt <= scalar(@a)) {
+ splice(@a, -$cnt);
+ return join('/', $host_part, @a, $u);
+ }
+ # invalid relative path, tries to go past root
+ return join('/', $host_part, $u);
+
+ }
+ return join('/', $host_part, $b, $u);
}
+
##---------------------------------------------------------------------------##
sub extract_charset {
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-COMMITS