xsl-list
[Top] [All Lists]

Re: [xsl] Sorting substitution instructions by max. length of matches

2007-10-05 08:05:33

I'd probably use analyze-string rather than using an explicit recursion
to do the lookup.  (I think in both cases they may fail to find the
longest match possible, as earlier matches may obscure a possible later
longer match)

something like

<xsl:stylesheet
    version="2.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
    xmlns:xs="http://www.w3.org/2001/XMLSchema";
    xmlns:my="http://xmlns.srz.de/yforkl/xslt/functions";
    exclude-result-prefixes="my xs">

  <!-- using dashes in function names, underscores in variable names -->


  <xsl:template name="main">

    <!-- sample data -->
    <xsl:variable name="input"
                  select="'abcddddxxxxxxxyyyabcxxxxxabcdxabc'"/>

    <!-- sample data -->
    <xsl:variable name="substitution_instructions">
      <substitution>
        <old>[a-e]+</old>
        <new>***</new>
      </substitution>
      <substitution>
        <old>d</old>
        <new>#</new>
      </substitution>
      <substitution>
        <old>123</old>
        <new>...</new>
      </substitution>
      <substitution>
        <old>c+</old>
        <new>#</new>
      </substitution>
      <substitution>
        <old>x+y*</old>
        <new>+++</new>
      </substitution>
    </xsl:variable>

    <xsl:variable name="substitution_instructions_sorted">
      <xsl:for-each select="$substitution_instructions/*">
        <xsl:sort order="descending">
          <xsl:variable name="lengths" as="xs:integer+">
            <xsl:sequence select="0"/>
            <xsl:analyze-string select="$input" regex="{old}">
              <xsl:matching-substring>
                <xsl:sequence select="string-length(.)"/>
              </xsl:matching-substring>
            </xsl:analyze-string>
          </xsl:variable>
          <xsl:sequence select="max($lengths)"/>
        </xsl:sort>
        <xsl:variable name="lengths" as="xs:integer+">
          <xsl:sequence select="0"/>
          <xsl:analyze-string select="$input" regex="{old}">
            <xsl:matching-substring>
              <xsl:sequence select="string-length(.)"/>
            </xsl:matching-substring>
          </xsl:analyze-string>
        </xsl:variable>
        <xsl:if test="max($lengths)!=0">
          <substitution>
            <max_match_length><xsl:value-of 
select="max($lengths)"/></max_match_length>
            <xsl:copy-of select="*"/>
          </substitution>
        </xsl:if>


      </xsl:for-each>

    </xsl:variable>




    <xsl:message>
      <xsl:value-of
          select="concat('input: ', $input, '&#10;')"/>
      <xsl:value-of
          select="'========================================&#10;'"/>
      <xsl:for-each
          select="$substitution_instructions_sorted/substitution">
        <xsl:value-of
            select="concat('regex: ', old, '&#10;')"/>
        <xsl:value-of
            select="concat('max. match length: ', max_match_length, 
                    '&#10;')"/>
        <xsl:value-of
            select="'----------------------------------------&#10;'"/>
      </xsl:for-each>
    </xsl:message>

  </xsl:template>

</xsl:stylesheet>

$ saxon8 -it main regexlength.xsl~ 
input: abcddddxxxxxxxyyyabcxxxxxabcdxabc
========================================
regex: x+y*
max. match length: 10
----------------------------------------
regex: [a-e]+
max. match length: 7
----------------------------------------
regex: c+
max. match length: 1
----------------------------------------
regex: d
max. match length: 1
----------------------------------------

________________________________________________________________________
The Numerical Algorithms Group Ltd is a company registered in England
and Wales with company number 1249803. The registered office is:
Wilkinson House, Jordan Hill Road, Oxford OX2 8DR, United Kingdom.

This e-mail has been scanned for all viruses by Star. The service is
powered by MessageLabs. 
________________________________________________________________________

--~------------------------------------------------------------------
XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list
To unsubscribe, go to: http://lists.mulberrytech.com/xsl-list/
or e-mail: <mailto:xsl-list-unsubscribe(_at_)lists(_dot_)mulberrytech(_dot_)com>
--~--

<Prev in Thread] Current Thread [Next in Thread>