xsl-list
[Top] [All Lists]

[xsl] Group Techniques Logic

2009-12-07 04:31:50
Hi List,
I am re-posting this query as a fresh thread as earlier thread is creating
confusion. 

I want to structured a plain html document into a structdured document.
Martin has provided a partial logic and we have modified it. Now we have two
functions (jsr:group and mf:group). We are converting same output by
applying both the functions but both of these functions have logical errors
which is not easily visible by middle level xslt developer. I request to the
whole xslt community to please have a look on both the functions and suggest
the correctness. It is looking a more challanging one. Input, output and
xslt are provided below.


***************
XSLT
***************
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
                xmlns:xd="http://www.pnp-software.com/XSLTdoc";
                xmlns:xs="http://www.w3.org/2001/XMLSchema"; 
                xmlns:xsd="http://www.w3.org/2001/XMLSchema";
                xmlns:jsr="http://aptara.com/2009/jsr";
                xmlns:mf="http://example.com/2009/mf";
                exclude-result-prefixes="xs xd mf jsr xsd" 
                version="2.0">
<xsl:output method="xml" indent="yes"/>

<!--Modified version of Logic provided by Martin Honnen -->
<xsl:function name="jsr:group" as="node()*">
 <xsl:param name="elements" as="element()*"/>
 <xsl:param name="level" as="xsd:integer"/>
 <xsl:for-each-group select="$elements"
group-starting-with="p[(_at_)class=concat('Head', $level) or
@class=concat('head', $level)]">
  <xsl:if test="self::p[matches(@class,'^Head[0-9]+$') or
matches(@class,'^head[0-9]+$')]">
   <xsl:text
disable-output-escaping="yes"><![CDATA[<sc]]></xsl:text><xsl:value-of
select="$level"/>
   <xsl:text disable-output-escaping="yes"><![CDATA[>]]></xsl:text>
  </xsl:if>
   <xsl:variable name="current-head" as="element()?"
select="self::p[(_at_)class=concat('Head', $level) or @class=concat('head',
$level)]"/>
   <xsl:apply-templates select="$current-head"/>
   <xsl:variable name="max-level" select="max(for $h in
current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]] return
xsd:integer(substring($h/@class, 5)))"/>
   <xsl:variable name="next-head" as="element()?"
select="current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]][1]"/>
   <xsl:variable name="v1" as="element()*" select="current-group()[.
&lt;&lt; $next-head] except $current-head"/>
   <xsl:apply-templates select="$v1"/>
   <xsl:choose>
    <xsl:when test="$level lt $max-level">
     <xsl:sequence select="jsr:group(current-group() except ($current-head,
$v1), $level + 1)"/>
    </xsl:when>
    <xsl:when test="$level eq $max-level">
     <xsl:apply-templates select="current-group() except $current-head"/>
    </xsl:when>
    <xsl:otherwise>
     <xsl:apply-templates select="current-group() except $current-head"/>
    </xsl:otherwise>
   </xsl:choose>
  <xsl:if test="self::p[matches(@class,'^Head[0-9]+$') or
matches(@class,'^head[0-9]+$')]">
   <xsl:text
disable-output-escaping="yes"><![CDATA[</sc]]></xsl:text><xsl:value-of
select="$level"/>
   <xsl:text disable-output-escaping="yes"><![CDATA[>]]></xsl:text>
  </xsl:if>
 </xsl:for-each-group>
</xsl:function>

<!--Logic provided by Martin Honnen -->
<xsl:function name="mf:group" as="node()*">
 <xsl:param name="elements" as="element()*"/>
 <xsl:param name="level" as="xsd:integer"/>
 <xsl:for-each-group select="$elements"
group-starting-with="p[(_at_)class=concat('Head', $level) or
@class=concat('head', $level)]">
  <xsl:element name="sc{$level}">
   <xsl:variable name="current-head" as="element()?"
select="self::p[(_at_)class=concat('Head', $level) or @class=concat('head',
$level)]"/>
   <xsl:apply-templates select="$current-head"/>
   <xsl:variable name="max-level" select="max(for $h in
current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]] return
xsd:integer(substring($h/@class, 5)))"/>
   <xsl:variable name="next-head" as="element()?"
select="current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]][1]"/>
   <xsl:variable name="v1" as="element()*" select="current-group()[.
&lt;&lt; $next-head] except $current-head"/>
   <xsl:apply-templates select="$v1"/>
   <xsl:choose>
    <xsl:when test="$level lt $max-level">
     <xsl:sequence select="mf:group(current-group() except ($current-head,
$v1), $level + 1)"/>
    </xsl:when>
    <xsl:when test="$level eq $max-level">
     <xsl:apply-templates select="current-group() except $current-head"/>
    </xsl:when>
    <xsl:otherwise>
     <xsl:apply-templates select="current-group() except $current-head"/>
    </xsl:otherwise>
   </xsl:choose>
  </xsl:element>
 </xsl:for-each-group>
</xsl:function>

<xsl:template match="html">
 <doc>
  <xsl:apply-templates select="body"/>
 </doc>
</xsl:template>

<xsl:template match="body">
 <xsl:apply-templates/>
</xsl:template>

<xsl:template match="div[(_at_)class='bdy']">
 <bdy>
  <xsl:variable name="v1" as="element()*" select="*[. &lt;&lt;
(current()/p[(_at_)class='Head1' or @class='head1' or @class='Head2' or
@class='head2' or @class='Head3' or @class='head3' or @class='Head4' or
@class='head4' or @class='Head5' or @class='head5' or @class='Head6' or
@class='head6'])[1]]"/>
  <xsl:apply-templates select="$v1"/>
  <xsl:sequence select="jsr:group(* except $v1, 1)"/>
 </bdy>
</xsl:template>

<xsl:template match="div[(_at_)class='bdy1']">
 <bdy1>
  <xsl:variable name="v1" as="element()*" select="*[. &lt;&lt;
(current()/p[(_at_)class='Head1' or @class='head1' or @class='Head2' or
@class='head2' or @class='Head3' or @class='head3' or @class='Head4' or
@class='head4' or @class='Head5' or @class='head5' or @class='Head6' or
@class='head6'])[1]]"/>
  <xsl:apply-templates select="$v1"/>
  <xsl:sequence select="mf:group(* except $v1, 1)"/>
 </bdy1>
</xsl:template>

<xsl:template match="p[matches(@class, '^Head[0-9]+$')] | p[matches(@class,
'^head[0-9]+$')]">
 <ti>
  <xsl:apply-templates/>
 </ti>
</xsl:template>

<xsl:template match="p">
 <p>
   <xsl:apply-templates/>
 </p>
</xsl:template>

</xsl:stylesheet>


***************
Input
***************
<html>
<body>
<div class="bdy">
  <p>Para i</p>
  <p class="head6">Head6</p>
  <p>Para ii</p>
  <p>para iii</p>
  <p class="head3">Head3</p>
  <p>Para 01</p>
  <p class="head1">Head1</p>
  <p>Para 02</p>
  <p>para 03</p>
  <p class="head1">Introduction Head1</p>
  <p>Para 04</p>
  <p>para 05</p>
  <p class="head2">Head 2</p>
  <p class="head3">Head 3</p>
  <p>Para 06</p>
  <p>para 07</p>
  <p class="head2">Head 2</p>
  <p>Para 08</p>
  <p>para 09</p>
  <p class="head3">Head 3</p>
  <p>Para 10</p>
  <p>para 11</p>
  <p class="head4">Head 4</p>
  <p>Para 12</p>
  <p>para 13</p>
  <p class="head5">Head 5</p>
  <p>Para 14</p>
  <p>para 15</p>
  <p class="head6">Head 6</p>
  <p>Para 16</p>
  <p>para 17</p>
  <p class="head2">Head 2</p>
  <p>Para 18</p>
  <p>para 19</p>
  <p class="head5">Head 5</p>
  <p>Para 20</p>
  <p>para 21</p>
  <p class="head3">Head 3</p>
  <p class="head4">Head 4</p>
  <p>Para 22</p>
  <p>para 23</p>
  <p class="Head1">Head1</p>
  <p>Para 24</p>
  <p>para 25</p>
  <p class="Head3">Head3</p>
  <p>Para 26</p>
  <p>Para 27</p>
  <p class="Head1">Head1</p>
  <p>Para 28</p>
  <p>para 29</p>
</div>
<div class="bdy1">
  <p>Para i</p>
  <p class="head6">Head6</p>
  <p>Para ii</p>
  <p>para iii</p>
  <p class="head3">Head3</p>
  <p>Para 01</p>
  <p class="head1">Head1</p>
  <p>Para 02</p>
  <p>para 03</p>
  <p class="head1">Introduction Head1</p>
  <p>Para 04</p>
  <p>para 05</p>
  <p class="head2">Head 2</p>
  <p class="head3">Head 3</p>
  <p>Para 06</p>
  <p>para 07</p>
  <p class="head2">Head 2</p>
  <p>Para 08</p>
  <p>para 09</p>
  <p class="head3">Head 3</p>
  <p>Para 10</p>
  <p>para 11</p>
  <p class="head4">Head 4</p>
  <p>Para 12</p>
  <p>para 13</p>
  <p class="head5">Head 5</p>
  <p>Para 14</p>
  <p>para 15</p>
  <p class="head6">Head 6</p>
  <p>Para 16</p>
  <p>para 17</p>
  <p class="head2">Head 2</p>
  <p>Para 18</p>
  <p>para 19</p>
  <p class="head5">Head 5</p>
  <p>Para 20</p>
  <p>para 21</p>
  <p class="head3">Head 3</p>
  <p class="head4">Head 4</p>
  <p>Para 22</p>
  <p>para 23</p>
  <p class="Head1">Head1</p>
  <p>Para 24</p>
  <p>para 25</p>
  <p class="Head3">Head3</p>
  <p>Para 26</p>
  <p>Para 27</p>
  <p class="Head1">Head1</p>
  <p>Para 28</p>
  <p>para 29</p>
</div>
</body>
</html> 

****************
Required OUTPUT
****************
<doc>
   <bdy>
      <p>Para i</p>
      <sc1>
         <sc2>
            <sc3>
               <sc4>
                  <sc5>
                     <sc6>
                        <ti>Head6</ti>
                        <p>Para ii</p>
                        <p>para iii</p>
                     </sc6>
                  </sc5>
               </sc4>
            </sc3>
            <sc3>
               <ti>Head3</ti>
               <p>Para 01</p>
            </sc3>
         </sc2>
      </sc1>
      <sc1>
         <ti>Head1</ti>
         <p>Para 02</p>
         <p>para 03</p>
      </sc1>
      <sc1>
         <ti>Introduction Head1</ti>
         <p>Para 04</p>
         <p>para 05</p>
         <sc2>
            <ti>Head 2</ti>
            <sc3>
               <ti>Head 3</ti>
               <p>Para 06</p>
               <p>para 07</p>
            </sc3>
         </sc2>
         <sc2>
            <ti>Head 2</ti>
            <p>Para 08</p>
            <p>para 09</p>
            <sc3>
               <ti>Head 3</ti>
               <p>Para 10</p>
               <p>para 11</p>
               <sc4>
                  <ti>Head 4</ti>
                  <p>Para 12</p>
                  <p>para 13</p>
                  <sc5>
                     <ti>Head 5</ti>
                     <p>Para 14</p>
                     <p>para 15</p>
                     <sc6>
                        <ti>Head 6</ti>
                        <p>Para 16</p>
                        <p>para 17</p>
                     </sc6>
                  </sc5>
               </sc4>
            </sc3>
         </sc2>
         <sc2>
            <ti>Head 2</ti>
               <p>Para 18</p>
               <p>para 19</p>
               <sc4>
                  <sc5>
                     <ti>Head 5</ti>
                     <p>Para 20</p>
                     <p>para 21</p>
                  </sc5>
               </sc4>
            </sc3>
            <sc3>
               <ti>Head 3</ti>
               <sc4>
                  <ti>Head 4</ti>
                  <p>Para 22</p>
                  <p>para 23</p>
               </sc4>
            </sc3>
         </sc2>
      </sc1>
      <sc1>
         <ti>Head1</ti>
         <p>Para 24</p>
         <p>para 25</p>
         <sc2>
            <sc3>
              <ti>Head3</ti>
              <p>Para 26</p>
              <p>Para 27</p>
            </sc3>
         </sc2>
      </sc1>
      <sc1>
         <ti>Head1</ti>
         <p>Para 28</p>
         <p>para 29</p>
      </sc1>
   </bdy>
</doc>


--~------------------------------------------------------------------
XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list
To unsubscribe, go to: http://lists.mulberrytech.com/xsl-list/
or e-mail: <mailto:xsl-list-unsubscribe(_at_)lists(_dot_)mulberrytech(_dot_)com>
--~--

<Prev in Thread] Current Thread [Next in Thread>