Hi Marina,
"marina" <marina777uk(_at_)yahoo(_dot_)com> wrote...
1) Find out how many messages over all were sent to 1,
2, 3 etc people.
As a duplicated message will always follow the
original, i.e. be the next <MESSAGE> tag of the
following sibling node, I'm thinking that the
stylesheet would start with the first message and keep comparing
siblings until it found one that was different. Then it would just add
the previous number of sibling nodes? ( I probably need to use keys?)
I think you will need to do that as a first task - i.e. restructure the
XML so that the duplicated messages become one message. That task, in
isolation, might look something like...
== XSL1 ===========================================
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="LOG">
<output>
<!-- look for distinct message starts -->
<xsl:for-each select="SENT[concat(USER,'|',LOCATION,'|',MESSAGE) !=
concat(preceding-sibling::SENT[1]/USER,'|',preceding-sibling::SENT[1]/LO
CATION,'|',preceding-sibling::SENT[1]/MESSAGE)]">
<msg>
<msg-text>
<!-- remove leading/trailing whitespace of the message text
-->
<xsl:value-of select="normalize-space(MESSAGE)"/>
</msg-text>
<!-- get the count of targets of this distinct message -->
<xsl:variable name="next-msg-id"
select="generate-id(following-sibling::SENT[concat(USER,'|',LOCATION,'|'
,MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
"/>
<target-count>
<xsl:value-of select="count(. |
following-sibling::SENT[generate-id(following-sibling::SENT[concat(USER,
'|',LOCATION,'|',MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
= $next-msg-id])"/>
</target-count>
<!-- copy some of the info - just so that you can see which
message is which -->
<xsl:copy-of select="USER | LOCATION"/>
</msg>
</xsl:for-each>
</output>
</xsl:template>
</xsl:stylesheet>
== end of XSL1 ====================================
Then you could use that restructured XML (in one stylesheet - using
RTFs) to create the final table, e.g.
== XSL2 ===========================================
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exslt="http://exslt.org/common">
<xsl:output method="html" indent="yes"/>
<!-- key for finding distinct target counts -->
<xsl:key name="kDistinctTargetCount" match="msg" use="target-count"/>
<!-- re-build XML so that distinct messages become one message -->
<xsl:variable name="rtf-distinct-msgs">
<!-- look for distinct message starts -->
<xsl:for-each select="LOG/SENT[concat(USER,'|',LOCATION,'|',MESSAGE)
!=
concat(preceding-sibling::SENT[1]/USER,'|',preceding-sibling::SENT[1]/LO
CATION,'|',preceding-sibling::SENT[1]/MESSAGE)]">
<msg>
<msg-text>
<!-- remove leading/trailing whitespace of the message text -->
<xsl:value-of select="normalize-space(MESSAGE)"/>
</msg-text>
<!-- get the count of targets of this distinct message -->
<xsl:variable name="next-msg-id"
select="generate-id(following-sibling::SENT[concat(USER,'|',LOCATION,'|'
,MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
"/>
<target-count>
<xsl:value-of select="count(. |
following-sibling::SENT[generate-id(following-sibling::SENT[concat(USER,
'|',LOCATION,'|',MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
= $next-msg-id])"/>
</target-count>
</msg>
</xsl:for-each>
</xsl:variable>
<!-- convert RTF to node-set -->
<xsl:variable name="distinct-msgs"
select="exslt:node-set($rtf-distinct-msgs)/msg"/>
<xsl:template match="/">
<html>
<body>
<table border="1">
<tr>
<th>
<xsl:text>Group Size</xsl:text>
</th>
<th>
<xsl:text>Number of Messages</xsl:text>
</th>
<th>
<xsl:text>Av Number Words</xsl:text>
</th>
</tr>
<!-- process the rebuilt messages by their distinct target
counts -->
<xsl:apply-templates select="$distinct-msgs[generate-id() =
generate-id(key('kDistinctTargetCount',target-count))]">
<xsl:sort select="target-count" data-type="number"/>
</xsl:apply-templates>
</table>
</body>
</html>
</xsl:template>
<xsl:template match="msg">
<!-- get the messages that have this same target count -->
<xsl:variable name="also-msgs"
select="key('kDistinctTargetCount',target-count)"/>
<!-- count them -->
<xsl:variable name="count-also-msgs" select="count($also-msgs)"/>
<tr>
<td>
<xsl:value-of select="target-count"/>
</td>
<td>
<xsl:value-of select="$count-also-msgs"/>
</td>
<!-- get word counts -->
<xsl:variable name="rtf-word-counts">
<xsl:for-each select="$also-msgs">
<word-count>
<!-- crude word count is length of string - length of string
without spaces -->
<xsl:value-of select="string-length(msg-text) -
string-length(translate(msg-text,' 	','')) + 1"/>
</word-count>
</xsl:for-each>
</xsl:variable>
<td>
<xsl:value-of
select="sum(exslt:node-set($rtf-word-counts)/word-count) div
$count-also-msgs"/>
</td>
</tr>
</xsl:template>
</xsl:stylesheet>
== end of XSL2 ====================================
Hope this helps
Marrow
http://www.marrowsoft.com - home of Xselerator (XSLT IDE and debugger)
http://www.topxml.com/Xselerator
XSL-List info and archive: http://www.mulberrytech.com/xsl/xsl-list