xsl-list
[Top] [All Lists]

Re: Linenumbering & word index

2004-08-06 07:26:59


But I don't see how I a) tokenize only the output of l/text() and
head/text() (it complains of multiple inputs when I do so), and 
b) how I get line-number and poem-number based on position()?

You can't do 
tokenize(l/text(), '\s+')
because it wants a single string as its first argument and that's
probably more than one. 

You can do

 select="for $l in l return tokenize($l,'\s+')"
or same with for-each and tokenize them one at a time.

however you really want to make yourself a tree first something like:

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"; version="2.0">
<xsl:output indent="yes"/>

<xsl:template match="/">
<xsl:variable name="x">
<xsl:apply-templates mode="a" select="div[(_at_)type='poem']"/>
</xsl:variable>
[
<xsl:copy-of  select="$x"/>
]
<xsl:for-each-group select="$x/div/l/word" group-by=".">
 <xsl:sort />
  <xsl:text>&#10;</xsl:text>
  <xsl:value-of select="."/>
  <xsl:for-each select="key('w',.)">
  <xsl:text> </xsl:text>
  <xsl:value-of select="../../@poem"/>:<xsl:value-of select="../@n"/>
  </xsl:for-each>
</xsl:for-each-group>
</xsl:template>

<xsl:template mode="a" match="div">
<div poem="{position()}">
<xsl:apply-templates mode="a" select="head"/>
<xsl:apply-templates mode="a" select="lg/l"/>
</div>
</xsl:template>

<xsl:template mode="a" match="head">
<l n="head">
<xsl:for-each select="tokenize(.,'(\s|[,\.!])+')">
<word><xsl:value-of select="lower-case(.)"/></word>
</xsl:for-each>
</l>
</xsl:template>

<xsl:template mode="a" match="l">
<l n="{position()}">
<xsl:for-each select="tokenize(.,'\s+')">
<word><xsl:value-of select="."/></word>
</xsl:for-each>
</l>
</xsl:template>

<xsl:key name="w" match="word" use="."/>

</xsl:stylesheet>


$ saxon8 poem.xml poem.xsl
<?xml version="1.0" encoding="UTF-8"?>
[

<div poem="1">
   <l n="head">
      <word>headers</word>
      <word>should</word>
      <word>be</word>
      <word>included</word>
      <word>in</word>
      <word>word</word>
      <word>index</word>
   </l>
   <l n="1">
      <word>This</word>
      <word>is</word>
      <word>a</word>
      <word>line</word>
      <word>that</word>
      <word>really</word>
      <word>should</word>
      <word>be</word>
      <word>included</word>
   </l>
   <l n="2">
      <word>This</word>
      <word>is</word>
      <word>a</word>
      <word>line</word>
      <word>that</word>
      <word>should</word>
      <word>be</word>
      <word>included</word>
   </l>
   <l n="3">
      <word>This</word>
      <word>is</word>
      <word>a</word>
      <word>line</word>
      <word>that</word>
      <word>really</word>
      <word>should</word>
      <word>be</word>
      <word>included</word>
   </l>
   <l n="4">
      <word>This</word>
      <word>is</word>
      <word>a</word>
      <word>line</word>
      <word>that</word>
      <word>should</word>
      <word>be</word>
      <word>included</word>
   </l>
</div>
]

a 1:1 1:2 1:3 1:4
be 1:head 1:1 1:2 1:3 1:4
headers 1:head
in 1:head
included 1:head 1:1 1:2 1:3 1:4
index 1:head
is 1:1 1:2 1:3 1:4
line 1:1 1:2 1:3 1:4
really 1:1 1:3
should 1:head 1:1 1:2 1:3 1:4
that 1:1 1:2 1:3 1:4
This 1:1 1:2 1:3 1:4
word 1:head

________________________________________________________________________
This e-mail has been scanned for all viruses by Star Internet. The
service is powered by MessageLabs. For more information on a proactive
anti-virus service working around the clock, around the globe, visit:
http://www.star.net.uk
________________________________________________________________________


<Prev in Thread] Current Thread [Next in Thread>