Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc
In directory subversions:/tmp/cvs-serv18499/lib/MHonArc
Modified Files:
UTF8.pm
Log Message:
* Added TEXTCLIPFUNC resource: Defines the text clipping function
that should be used by MHonArc. This function is mainly used
in resource variable expansion where clipping has been specified,
for example, "$SUBJECT:72$".
* Added clip() function in MHonArc::UTF8 that can be registed via
TEXTCLIPFUNC resource to handling clipping of UTF-8 text.
* Example utf-8.mrc updated to include some corrections and to
define TEXTCLIPFUNC resource.
* Improved navigation links to resource reference pages which should
help their usability.
Index: UTF8.pm
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc/UTF8.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** UTF8.pm 20 Jul 2002 00:48:48 -0000 1.1
--- UTF8.pm 27 Jul 2002 05:13:14 -0000 1.2
***************
*** 30,33 ****
--- 30,34 ----
use strict;
+ use Unicode::String;
use Unicode::MapUTF8 qw(
to_utf8 utf8_charset_alias utf8_supported_charset
***************
*** 75,78 ****
--- 76,141 ----
}
$str;
+ }
+
+ sub clip {
+ use utf8;
+ my $str = \shift; # Prevent unnecessary copy.
+ my $len = shift; # Clip length
+ my $is_html = shift; # If entity references should be considered
+ my $has_tags = shift; # If html tags should be stripped
+
+ my $u = Unicode::String::utf8($$str);
+
+ if (!$is_html) {
+ return $u->substr(0, $len);
+ }
+
+ my $text = Unicode::String::utf8("");
+ my $subtext;
+ my $html_len = $u->length;
+ my($pos, $sublen, $erlen, $real_len);
+ my $er_len = 0;
+
+ for ( $pos=0, $sublen=$len; $pos < $html_len; ) {
+ $subtext = $u->substr($pos, $sublen);
+ $pos += $sublen;
+
+ # strip tags
+ if ($has_tags) {
+ $subtext =~ s/\A[^<]*>//; # clipped tag
+ $subtext =~ s/<[^>]*>//g;
+ $subtext =~ s/<[^>]*\Z//; # clipped tag
+ }
+
+ # check for clipped entity reference
+ if (($pos < $html_len) && ($subtext =~ /\&[^;]*\Z/)) {
+ my $semi = $u->index(';', $pos);
+ if ($semi < 0) {
+ # malformed entity reference
+ $subtext .= $u->substr($pos);
+ $pos = $html_len;
+ } else {
+ $subtext .= $u->substr($pos, $semi-$pos+1)
+ if $semi > $pos;
+ $pos = $semi+1;
+ }
+ }
+
+ # compute entity reference lengths to determine "real" character
+ # count and not raw character count.
+ while ($subtext =~ /(\&[^;]+);/g) {
+ $er_len += length($1);
+ }
+
+ $text .= $subtext;
+
+ # done if we have enough
+ $real_len = $text->length - $er_len;
+ if ($real_len >= $len) {
+ last;
+ }
+ $sublen = $len - ($text->length - $er_len);
+ }
+ $text;
}
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV