xsl-list
[Top] [All Lists]

[xsl] Recognising Unicode in a CSV to XML transform

2008-05-25 19:59:27
Hi,

I have been trying to solve this problem and have hit a number of brick
walls. I'm using the following stylesheet to transform a CSV file to XML
courtesy of Andrew Welch.

My problem is when I feed the transform a CSV with any characters that sit
outside the Basic Latin set, I get the "Cannot locate :" messege found in
the <xsl:otherwise> statement.

Is it possible to somehow integrate the following into the regex of the
initial analyze string so these characters do not break the transform?

\p{InLatin-1_Supplement} (U+0080..U+00FF)
\p{InGeneral_Punctuation} (U+2000..U+206F)

I am really clutching at straws here!

Thanks
Marney

<?xml version="1.0"?>
<!--
    A CSV to XML transform
    Version 2
    Andrew Welch
    http://andrewjwelch.com
    
    Modify or supply the $pathToCSV parameter and run the transform
    using "main" as the initial template.
    
    For bug reports or modification requests contact me at
andrew(_dot_)j(_dot_)welch(_at_)gmail(_dot_)com
-->

<xsl:stylesheet version="2.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
    xmlns:xs="http://www.w3.org/2001/XMLSchema";
  xmlns:msxsl="urn:schemas-microsoft-com:xslt"
    xmlns:fn="fn"
    exclude-result-prefixes="xs fn msxsl">

  <xsl:output indent="yes" encoding="UTF-8"/>

  <xsl:param name="pathToCSV" select="'file:///c:/Documents and
Settings/webcopy.csv'"/>

  <xsl:function name="fn:getTokens" as="xs:string+">
    <xsl:param name="str" as="xs:string"/>
    <xsl:analyze-string select="concat($str, ',')"
regex='(("[^"]*")+|[^,]*),'>
      <xsl:matching-substring>
        <xsl:sequence select='replace(regex-group(1), "^""|""$|("")""",
"$1")'/>
      </xsl:matching-substring>
    </xsl:analyze-string>
  </xsl:function>

  <xsl:template match="/" name="main">
      <xsl:choose>
        <xsl:when test="unparsed-text-available($pathToCSV)">
          <xsl:variable name="csv" select="unparsed-text($pathToCSV)"/>
          <xsl:variable name="lines" select="tokenize($csv, '&#xD;&#xA;')"
as="xs:string+"/>
          <xsl:variable name="elemNames" select="fn:getTokens($lines[1])"
as="xs:string+"/>
          <root>
            <xsl:for-each select="$lines[position() > 1]">
              <row>
                <xsl:variable name="lineItems" select="fn:getTokens(.)"
as="xs:string+"/>
                <xsl:for-each select="$elemNames">
                  <xsl:variable name="pos" select="position()"/>
                  <xsl:element name="{.}">
                    <xsl:value-of select="$lineItems[$pos]"/>
                  </xsl:element>
                </xsl:for-each>
              </row>
            </xsl:for-each>
          </root>
        </xsl:when>
        <xsl:otherwise>
          <xsl:text>Cannot locate : </xsl:text>
          <xsl:value-of select="$pathToCSV"/>
        </xsl:otherwise>
      </xsl:choose>
  </xsl:template>

</xsl:stylesheet>
Marney Cotterill
graphic designer
                   
cracker//brandware

6 Bourke Street
Queens Park 
NSW 2022
Telephone 02 9387 2001
Facsimile 02 9387 2006
marney(_at_)crackerbrandware(_dot_)com
www.crackerbrandware.com



--~------------------------------------------------------------------
XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list
To unsubscribe, go to: http://lists.mulberrytech.com/xsl-list/
or e-mail: <mailto:xsl-list-unsubscribe(_at_)lists(_dot_)mulberrytech(_dot_)com>
--~--