xsl-list
[Top] [All Lists]

[xsl] Collation in another language

2011-11-05 02:18:52
I am not sure if this question belongs here or not, but hope for an answer. I need a collator that puts ‘ch’ between ‘H’ and ‘I’. In another email from this list, Jirka Kosek gave me a link to some code he had written a very long time ago for docbook, but it is not at all clear to me how to use his code (located here:)
http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=3&ved=0CD0QFjAC&url=http%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fdownload%3Fdoi%3D10.1.1.131.2069%26rep%3Drep1%26type%3Dpdf&ei=-NOvTsGkJ5Gf-Qa7xpyqAg&usg=AFQjCNHnFYbsaML6F_MsAFKktr1DMZyR8g


I do not know java, but cobbled together a Czech language collator from notes I found on the web (if there is c java Czech collator already available, I don’t know how to find it). I call the collator in my style sheet:

<xsl:variable name="sorting-collation" select="'http://saxon.sf.net/collation?class=CzechCollation'"/>
 <xsl:template match="List">
   <List>
     <xsl:for-each-group select="Word"  group-by="@word">
       <xsl:sort select="@word" collation="{$sorting-collation}"/>
......

And my java code is below: It does not put the ‘ch’ digraph in the right place


/**
* CzechCollation.java
* Created on Nov 4, 2011
*/

import java.text.ParseException;
import java.text.RuleBasedCollator;

public class CzechCollation extends RuleBasedCollator
{
   public static void main(String[] args) throws Exception
   {
       System.err.println(new CzechCollation().compare(args[0], args[1]));
   }
 public CzechCollation() throws ParseException
 {

     super(traditionalCzechRules);
 }

   private static String upperAcuteA = new String("\u00c1");
   private static String lowerAcuteA = new String("\u00e1");
   private static String upperUmlatA = new String("\u00c4");
   private static String lowerUmlatA = new String("\u00e4");
   private static String upperHacekC = new String("\u010c");
   private static String lowerHacekC = new String("\u010d");
   private static String upperHacekD = new String("\u010e");
   private static String lowerHacekD = new String("\u010f");
   private static String upperAcuteE = new String("\u00c9");
   private static String lowerAcuteE = new String("\u00e9");
   private static String upperHacekE = new String("\u011a");
   private static String lowerHacekE = new String("\u011b");
   private static String upperUmlatE = new String("\u00cb");
   private static String lowerUmlatE = new String("\u00eb");
   private static String upperAcuteI = new String("\u00cd");
   private static String lowerAcuteI = new String("\u00ed");
   private static String upperHacekL = new String("\u013d");
   private static String lowerHacekL = new String("\u013e");
   private static String upperHacekN = new String("\u0147");
   private static String lowerHacekN = new String("\u0148");
   private static String upperAcuteO = new String("\u00d3");
   private static String lowerAcuteO = new String("\u00f3");
   private static String upperUmlatO = new String("\u00d6");
   private static String lowerUmlatO = new String("\u00f6");
   private static String upperHacekR = new String("\u0158");
   private static String lowerHacekR = new String("\u0159");
   private static String upperHacekS = new String("\u0160");
   private static String lowerHacekS = new String("\u0161");
   private static String upperHacekT = new String("\u0164");
   private static String lowerHacekT = new String("\u0165");
   private static String upperAcuteU = new String("\u00da");
   private static String lowerAcuteU = new String("\u00fa");
   private static String upperUmlatU = new String("\u00dc");
   private static String lowerUmlatU = new String("\u00fc");
   private static String upperRingU = new String("\u016e");
   private static String lowerRingU = new String("\u016f");
   private static String upperAcuteY = new String("\u00dd");
   private static String lowerAcuteY = new String("\u00fd");
   private static String upperHacekZ = new String("\u017d");
   private static String lowerHacekZ = new String("\u017e");


   private static String traditionalCzechRules =
   ("< ','< '-' < ' ' <0<1<2<3<4<5<6<7<8<9" +
" < A,a," + upperAcuteA + "," + lowerAcuteA + "," + upperUmlatA + "," + lowerUmlatA +
    "< B,b < C,c <"  + upperHacekC +"," + lowerHacekC +
    "< D,d <" + upperHacekD + "," + lowerHacekD +
"< E,e," + upperAcuteE + "," + lowerAcuteE + "," + upperHacekE +"," + lowerHacekE + "," + upperUmlatE +"," + lowerUmlatE + "< F,f < G,g < H,h < Ch,ch,cH,CH < I,i," + upperAcuteI + "," + lowerAcuteI +
    "< J,j < K,k < L,l <" + upperHacekL + "," + lowerHacekL+
    "< M,m < N,n <" + upperHacekN +"," + lowerHacekN +
"< O,o," + upperAcuteO + "," + lowerAcuteO + upperUmlatO + "," + lowerUmlatO +
    "< P,p < Q,q < R,r <" + upperHacekR + "," + lowerHacekR +
    "< S,s <" + upperHacekS + "," + lowerHacekS +
    "< T,t <" + upperHacekT + "," + lowerHacekT +
"< U,u, " + upperAcuteU + "," + lowerAcuteU + "," + upperUmlatU + "," + lowerUmlatU + "," + upperRingU + "," + lowerRingU +
    "< V,v < W,w < X,x < Y,y," + upperAcuteY + "," + lowerAcuteY +
    "< Z,z <" + upperHacekZ + "," + lowerHacekZ);

}



--~------------------------------------------------------------------
XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list
To unsubscribe, go to: http://lists.mulberrytech.com/xsl-list/
or e-mail: <mailto:xsl-list-unsubscribe(_at_)lists(_dot_)mulberrytech(_dot_)com>
--~--