mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib/MHonArc UTF8.pm,1.1,1.2

2002-07-26 22:13:25
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc
In directory subversions:/tmp/cvs-serv18499/lib/MHonArc

Modified Files:
	UTF8.pm 
Log Message:
* Added TEXTCLIPFUNC resource: Defines the text clipping function
  that should be used by MHonArc.  This function is mainly used
  in resource variable expansion where clipping has been specified,
  for example, "$SUBJECT:72$".

* Added clip() function in MHonArc::UTF8 that can be registed via
  TEXTCLIPFUNC resource to handling clipping of UTF-8 text.

* Example utf-8.mrc updated to include some corrections and to
  define TEXTCLIPFUNC resource.

* Improved navigation links to resource reference pages which should
  help their usability.


Index: UTF8.pm
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc/UTF8.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** UTF8.pm	20 Jul 2002 00:48:48 -0000	1.1
--- UTF8.pm	27 Jul 2002 05:13:14 -0000	1.2
***************
*** 30,33 ****
--- 30,34 ----
  
  use strict;
+ use Unicode::String;
  use Unicode::MapUTF8 qw(
      to_utf8 utf8_charset_alias utf8_supported_charset
***************
*** 75,78 ****
--- 76,141 ----
      }
      $str;
+ }
+ 
+ sub clip {
+     use utf8;
+     my $str      = \shift;  # Prevent unnecessary copy.
+     my $len      = shift;   # Clip length
+     my $is_html  = shift;   # If entity references should be considered
+     my $has_tags = shift;   # If html tags should be stripped
+ 
+     my $u = Unicode::String::utf8($$str);
+ 
+     if (!$is_html) {
+       return $u->substr(0, $len);
+     }
+ 
+     my $text = Unicode::String::utf8("");
+     my $subtext;
+     my $html_len = $u->length;
+     my($pos, $sublen, $erlen, $real_len);
+     my $er_len = 0;
+     
+     for ( $pos=0, $sublen=$len; $pos < $html_len; ) {
+ 	$subtext = $u->substr($pos, $sublen);
+ 	$pos += $sublen;
+ 
+ 	# strip tags
+ 	if ($has_tags) {
+ 	    $subtext =~ s/\A[^<]*>//; # clipped tag
+ 	    $subtext =~ s/<[^>]*>//g;
+ 	    $subtext =~ s/<[^>]*\Z//; # clipped tag
+ 	}
+ 
+ 	# check for clipped entity reference
+ 	if (($pos < $html_len) && ($subtext =~ /\&[^;]*\Z/)) {
+ 	    my $semi = $u->index(';', $pos);
+ 	    if ($semi < 0) {
+ 		# malformed entity reference
+ 		$subtext .= $u->substr($pos);
+ 		$pos = $html_len;
+ 	    } else {
+ 		$subtext .= $u->substr($pos, $semi-$pos+1)
+ 		    if $semi > $pos;
+ 		$pos = $semi+1;
+ 	    }
+ 	}
+ 
+ 	# compute entity reference lengths to determine "real" character
+ 	# count and not raw character count.
+ 	while ($subtext =~ /(\&[^;]+);/g) {
+ 	    $er_len += length($1);
+ 	}
+ 
+ 	$text .= $subtext;
+ 
+ 	# done if we have enough
+ 	$real_len = $text->length - $er_len;
+ 	if ($real_len >= $len) {
+ 	    last;
+ 	}
+ 	$sublen = $len - ($text->length - $er_len);
+     }
+     $text;
  }
  

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV