Hi List,
I am re-posting this query as a fresh thread as earlier thread is creating
confusion.
I want to structured a plain html document into a structdured document.
Martin has provided a partial logic and we have modified it. Now we have two
functions (jsr:group and mf:group). We are converting same output by
applying both the functions but both of these functions have logical errors
which is not easily visible by middle level xslt developer. I request to the
whole xslt community to please have a look on both the functions and suggest
the correctness. It is looking a more challanging one. Input, output and
xslt are provided below.
***************
XSLT
***************
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xd="http://www.pnp-software.com/XSLTdoc"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:jsr="http://aptara.com/2009/jsr"
xmlns:mf="http://example.com/2009/mf"
exclude-result-prefixes="xs xd mf jsr xsd"
version="2.0">
<xsl:output method="xml" indent="yes"/>
<!--Modified version of Logic provided by Martin Honnen -->
<xsl:function name="jsr:group" as="node()*">
<xsl:param name="elements" as="element()*"/>
<xsl:param name="level" as="xsd:integer"/>
<xsl:for-each-group select="$elements"
group-starting-with="p[(_at_)class=concat('Head', $level) or
@class=concat('head', $level)]">
<xsl:if test="self::p[matches(@class,'^Head[0-9]+$') or
matches(@class,'^head[0-9]+$')]">
<xsl:text
disable-output-escaping="yes"><![CDATA[<sc]]></xsl:text><xsl:value-of
select="$level"/>
<xsl:text disable-output-escaping="yes"><![CDATA[>]]></xsl:text>
</xsl:if>
<xsl:variable name="current-head" as="element()?"
select="self::p[(_at_)class=concat('Head', $level) or @class=concat('head',
$level)]"/>
<xsl:apply-templates select="$current-head"/>
<xsl:variable name="max-level" select="max(for $h in
current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]] return
xsd:integer(substring($h/@class, 5)))"/>
<xsl:variable name="next-head" as="element()?"
select="current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]][1]"/>
<xsl:variable name="v1" as="element()*" select="current-group()[.
<< $next-head] except $current-head"/>
<xsl:apply-templates select="$v1"/>
<xsl:choose>
<xsl:when test="$level lt $max-level">
<xsl:sequence select="jsr:group(current-group() except ($current-head,
$v1), $level + 1)"/>
</xsl:when>
<xsl:when test="$level eq $max-level">
<xsl:apply-templates select="current-group() except $current-head"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group() except $current-head"/>
</xsl:otherwise>
</xsl:choose>
<xsl:if test="self::p[matches(@class,'^Head[0-9]+$') or
matches(@class,'^head[0-9]+$')]">
<xsl:text
disable-output-escaping="yes"><![CDATA[</sc]]></xsl:text><xsl:value-of
select="$level"/>
<xsl:text disable-output-escaping="yes"><![CDATA[>]]></xsl:text>
</xsl:if>
</xsl:for-each-group>
</xsl:function>
<!--Logic provided by Martin Honnen -->
<xsl:function name="mf:group" as="node()*">
<xsl:param name="elements" as="element()*"/>
<xsl:param name="level" as="xsd:integer"/>
<xsl:for-each-group select="$elements"
group-starting-with="p[(_at_)class=concat('Head', $level) or
@class=concat('head', $level)]">
<xsl:element name="sc{$level}">
<xsl:variable name="current-head" as="element()?"
select="self::p[(_at_)class=concat('Head', $level) or @class=concat('head',
$level)]"/>
<xsl:apply-templates select="$current-head"/>
<xsl:variable name="max-level" select="max(for $h in
current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]] return
xsd:integer(substring($h/@class, 5)))"/>
<xsl:variable name="next-head" as="element()?"
select="current-group()[self::p[matches(@class,'^Head[0-9]+$')] or
self::p[matches(@class,'^head[0-9]+$')]][1]"/>
<xsl:variable name="v1" as="element()*" select="current-group()[.
<< $next-head] except $current-head"/>
<xsl:apply-templates select="$v1"/>
<xsl:choose>
<xsl:when test="$level lt $max-level">
<xsl:sequence select="mf:group(current-group() except ($current-head,
$v1), $level + 1)"/>
</xsl:when>
<xsl:when test="$level eq $max-level">
<xsl:apply-templates select="current-group() except $current-head"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group() except $current-head"/>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:for-each-group>
</xsl:function>
<xsl:template match="html">
<doc>
<xsl:apply-templates select="body"/>
</doc>
</xsl:template>
<xsl:template match="body">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="div[(_at_)class='bdy']">
<bdy>
<xsl:variable name="v1" as="element()*" select="*[. <<
(current()/p[(_at_)class='Head1' or @class='head1' or @class='Head2' or
@class='head2' or @class='Head3' or @class='head3' or @class='Head4' or
@class='head4' or @class='Head5' or @class='head5' or @class='Head6' or
@class='head6'])[1]]"/>
<xsl:apply-templates select="$v1"/>
<xsl:sequence select="jsr:group(* except $v1, 1)"/>
</bdy>
</xsl:template>
<xsl:template match="div[(_at_)class='bdy1']">
<bdy1>
<xsl:variable name="v1" as="element()*" select="*[. <<
(current()/p[(_at_)class='Head1' or @class='head1' or @class='Head2' or
@class='head2' or @class='Head3' or @class='head3' or @class='Head4' or
@class='head4' or @class='Head5' or @class='head5' or @class='Head6' or
@class='head6'])[1]]"/>
<xsl:apply-templates select="$v1"/>
<xsl:sequence select="mf:group(* except $v1, 1)"/>
</bdy1>
</xsl:template>
<xsl:template match="p[matches(@class, '^Head[0-9]+$')] | p[matches(@class,
'^head[0-9]+$')]">
<ti>
<xsl:apply-templates/>
</ti>
</xsl:template>
<xsl:template match="p">
<p>
<xsl:apply-templates/>
</p>
</xsl:template>
</xsl:stylesheet>
***************
Input
***************
<html>
<body>
<div class="bdy">
<p>Para i</p>
<p class="head6">Head6</p>
<p>Para ii</p>
<p>para iii</p>
<p class="head3">Head3</p>
<p>Para 01</p>
<p class="head1">Head1</p>
<p>Para 02</p>
<p>para 03</p>
<p class="head1">Introduction Head1</p>
<p>Para 04</p>
<p>para 05</p>
<p class="head2">Head 2</p>
<p class="head3">Head 3</p>
<p>Para 06</p>
<p>para 07</p>
<p class="head2">Head 2</p>
<p>Para 08</p>
<p>para 09</p>
<p class="head3">Head 3</p>
<p>Para 10</p>
<p>para 11</p>
<p class="head4">Head 4</p>
<p>Para 12</p>
<p>para 13</p>
<p class="head5">Head 5</p>
<p>Para 14</p>
<p>para 15</p>
<p class="head6">Head 6</p>
<p>Para 16</p>
<p>para 17</p>
<p class="head2">Head 2</p>
<p>Para 18</p>
<p>para 19</p>
<p class="head5">Head 5</p>
<p>Para 20</p>
<p>para 21</p>
<p class="head3">Head 3</p>
<p class="head4">Head 4</p>
<p>Para 22</p>
<p>para 23</p>
<p class="Head1">Head1</p>
<p>Para 24</p>
<p>para 25</p>
<p class="Head3">Head3</p>
<p>Para 26</p>
<p>Para 27</p>
<p class="Head1">Head1</p>
<p>Para 28</p>
<p>para 29</p>
</div>
<div class="bdy1">
<p>Para i</p>
<p class="head6">Head6</p>
<p>Para ii</p>
<p>para iii</p>
<p class="head3">Head3</p>
<p>Para 01</p>
<p class="head1">Head1</p>
<p>Para 02</p>
<p>para 03</p>
<p class="head1">Introduction Head1</p>
<p>Para 04</p>
<p>para 05</p>
<p class="head2">Head 2</p>
<p class="head3">Head 3</p>
<p>Para 06</p>
<p>para 07</p>
<p class="head2">Head 2</p>
<p>Para 08</p>
<p>para 09</p>
<p class="head3">Head 3</p>
<p>Para 10</p>
<p>para 11</p>
<p class="head4">Head 4</p>
<p>Para 12</p>
<p>para 13</p>
<p class="head5">Head 5</p>
<p>Para 14</p>
<p>para 15</p>
<p class="head6">Head 6</p>
<p>Para 16</p>
<p>para 17</p>
<p class="head2">Head 2</p>
<p>Para 18</p>
<p>para 19</p>
<p class="head5">Head 5</p>
<p>Para 20</p>
<p>para 21</p>
<p class="head3">Head 3</p>
<p class="head4">Head 4</p>
<p>Para 22</p>
<p>para 23</p>
<p class="Head1">Head1</p>
<p>Para 24</p>
<p>para 25</p>
<p class="Head3">Head3</p>
<p>Para 26</p>
<p>Para 27</p>
<p class="Head1">Head1</p>
<p>Para 28</p>
<p>para 29</p>
</div>
</body>
</html>
****************
Required OUTPUT
****************
<doc>
<bdy>
<p>Para i</p>
<sc1>
<sc2>
<sc3>
<sc4>
<sc5>
<sc6>
<ti>Head6</ti>
<p>Para ii</p>
<p>para iii</p>
</sc6>
</sc5>
</sc4>
</sc3>
<sc3>
<ti>Head3</ti>
<p>Para 01</p>
</sc3>
</sc2>
</sc1>
<sc1>
<ti>Head1</ti>
<p>Para 02</p>
<p>para 03</p>
</sc1>
<sc1>
<ti>Introduction Head1</ti>
<p>Para 04</p>
<p>para 05</p>
<sc2>
<ti>Head 2</ti>
<sc3>
<ti>Head 3</ti>
<p>Para 06</p>
<p>para 07</p>
</sc3>
</sc2>
<sc2>
<ti>Head 2</ti>
<p>Para 08</p>
<p>para 09</p>
<sc3>
<ti>Head 3</ti>
<p>Para 10</p>
<p>para 11</p>
<sc4>
<ti>Head 4</ti>
<p>Para 12</p>
<p>para 13</p>
<sc5>
<ti>Head 5</ti>
<p>Para 14</p>
<p>para 15</p>
<sc6>
<ti>Head 6</ti>
<p>Para 16</p>
<p>para 17</p>
</sc6>
</sc5>
</sc4>
</sc3>
</sc2>
<sc2>
<ti>Head 2</ti>
<p>Para 18</p>
<p>para 19</p>
<sc4>
<sc5>
<ti>Head 5</ti>
<p>Para 20</p>
<p>para 21</p>
</sc5>
</sc4>
</sc3>
<sc3>
<ti>Head 3</ti>
<sc4>
<ti>Head 4</ti>
<p>Para 22</p>
<p>para 23</p>
</sc4>
</sc3>
</sc2>
</sc1>
<sc1>
<ti>Head1</ti>
<p>Para 24</p>
<p>para 25</p>
<sc2>
<sc3>
<ti>Head3</ti>
<p>Para 26</p>
<p>Para 27</p>
</sc3>
</sc2>
</sc1>
<sc1>
<ti>Head1</ti>
<p>Para 28</p>
<p>para 29</p>
</sc1>
</bdy>
</doc>
--~------------------------------------------------------------------
XSL-List info and archive: http://www.mulberrytech.com/xsl/xsl-list
To unsubscribe, go to: http://lists.mulberrytech.com/xsl-list/
or e-mail: <mailto:xsl-list-unsubscribe(_at_)lists(_dot_)mulberrytech(_dot_)com>
--~--