The following stylesheet takes the XML output of Word 2003 beta and makes
it into a nicer, IMO, form. The formatting information is removed.
I am looking at the possibility of using styles mapped to my DTD so people
can create in Word but then we can work on the text in XML after initial
creation.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:w="http://schemas.microsoft.com/office/word/2003/2/wordml">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<!-- w:body holds the info we want inside of w:wordDocument -->
<xsl:template match="w:wordDocument">
<xsl:element name="xmlDocument">
<xsl:apply-templates select="w:body"/>
</xsl:element>
</xsl:template>
<!-- w:body is where all text is stored -->
<xsl:template match="w:body">
<xsl:apply-templates>
</xsl:apply-templates>
</xsl:template>
<!-- paragraph style in text -->
<xsl:template match="w:p">
<xsl:variable name="paragraphStyle"
select="descendant::w:pStyle/@w:val"/>
<xsl:element name="{$paragraphStyle}">
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<!-- region within a paragraph (character style) -->
<!-- only treat as a character style if w:rStryle is found -->
<xsl:template match="w:r[descendant::w:rStyle/@w:val]">
<xsl:variable name="characterStyle">
<xsl:value-of select="descendant::w:rStyle/@w:val"/>
</xsl:variable>
<xsl:element name="{$characterStyle}">
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<xsl:template match="w:t">
<xsl:apply-templates/>
</xsl:template>
<!-- line break -->
<xsl:template match="w:br">
<xsl:element name="lineBreak"/>
</xsl:template>
</xsl:stylesheet>
Jim Albright
704 843-0582
Wycliffe Bible Translators
XSL-List info and archive: http://www.mulberrytech.com/xsl/xsl-list