J'ai besoin de votre aide sur un fichier XSLT car je suis assez novice dans l'utilisation de ce type de fichiers pour formater XML.
En entrée, j'ai le XML suivant
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<alto xmlns="http://www.loc.gov/standards/alto/ns-v3#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-1.xsd">
<Description>
<MeasurementUnit>pixel</MeasurementUnit>
<OCRProcessing ID="IdOcr"><ocrProcessingStep><processingDateTime>2020-12-08</processingDateTime><processingSoftware><softwareCreator>ABCDE</softwareCreator><softwareName>ABCDE Engine</softwareName><softwareVersion>12</softwareVersion></processingSoftware></ocrProcessingStep></OCRProcessing>
</Description>
<Styles><TextStyle ID="font0" FONTFAMILY="Arial" FONTSIZE="10"/><TextStyle ID="font1" FONTFAMILY="Arial" FONTSIZE="11"/><TextStyle ID="font2" FONTFAMILY="Calibri" FONTSIZE="11"/><TextStyle ID="font3" FONTFAMILY="Cambria" FONTSIZE="11"/><TextStyle ID="font4" FONTFAMILY="Symbol" FONTSIZE="10"/><TextStyle ID="font5" FONTFAMILY="Symbol" FONTSIZE="11"/>
</Styles>
<Layout>
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="3508" WIDTH="2481">
<TopMargin HEIGHT="300" WIDTH="2481" VPOS="0" HPOS="0">
<TextBlock ID="Page1_Block1" HEIGHT="58" WIDTH="465" VPOS="150" HPOS="1008" LANG="en-US" STYLEREFS="font2">
<TextLine HEIGHT="46" WIDTH="453" VPOS="156" HPOS="1014"><String STYLE="bold" WC="1." CONTENT="LOREUM" HEIGHT="46" WIDTH="167" VPOS="156" HPOS="1014">LOREUM</String><SP HEIGHT="46" WIDTH="8" VPOS="156" HPOS="1182"/><String STYLE="bold" WC="1." CONTENT="IPSUM" HEIGHT="46" WIDTH="276" VPOS="156" HPOS="1191">IPSUM</String></TextLine>
</TextBlock>
</TopMargin>
<LeftMargin HEIGHT="2902" WIDTH="219" VPOS="300" HPOS="0">
</LeftMargin>
<RightMargin HEIGHT="2902" WIDTH="219" VPOS="300" HPOS="2262">
</RightMargin>
<BottomMargin HEIGHT="306" WIDTH="2481" VPOS="3202" HPOS="0">
<ComposedBlock ID="Page1_Block2" HEIGHT="58" WIDTH="1297" VPOS="3253" HPOS="965" TYPE="container">
<TextBlock ID="Page1_Block3" HEIGHT="51" WIDTH="410" VPOS="3259" HPOS="965" LANG="en-US" STYLEREFS="font2">
<TextLine HEIGHT="39" WIDTH="398" VPOS="3265" HPOS="971"><String STYLE="bold" STYLEREFS="font0" WC="1." CONTENT="dolor" HEIGHT="39" WIDTH="146" VPOS="3265" HPOS="971">dolor</String><SP HEIGHT="39" WIDTH="9" VPOS="3265" HPOS="1118"/><String STYLE="bold" STYLEREFS="font0" WC="1." CONTENT="sit" HEIGHT="39" WIDTH="241" VPOS="3265" HPOS="1128">sit</String></TextLine>
</TextBlock>
<TextBlock ID="Page1_Block4" HEIGHT="58" WIDTH="141" VPOS="3253" HPOS="2121" LANG="en-US" STYLEREFS="font2">
<TextLine HEIGHT="46" WIDTH="129" VPOS="3259" HPOS="2127"><String STYLEREFS="font3" WC="1." CONTENT="Page" HEIGHT="46" WIDTH="94" VPOS="3259" HPOS="2127">Page</String><SP HEIGHT="46" WIDTH="8" VPOS="3259" HPOS="2222"/><String STYLEREFS="font3" WC="1." CONTENT="1" HEIGHT="46" WIDTH="25" VPOS="3259" HPOS="2231">1</String></TextLine>
</TextBlock></ComposedBlock><GraphicalElement ID="Page1_Block5" HEIGHT="14" WIDTH="2044" VPOS="3228" HPOS="218"/><GraphicalElement ID="Page1_Block6" HEIGHT="4" WIDTH="2044" VPOS="3244" HPOS="218"/>
</BottomMargin>
<PrintSpace HEIGHT="2902" WIDTH="2043" VPOS="300" HPOS="219">
<TextBlock ID="Page1_Block7" HEIGHT="124" WIDTH="898" VPOS="303" HPOS="791" LANG="en-US" STYLEREFS="font0">
<TextLine HEIGHT="39" WIDTH="886" VPOS="309" HPOS="797"><String STYLE="bold" WC="1." CONTENT="amet" HEIGHT="39" WIDTH="183" VPOS="309" HPOS="797">amet</String><SP HEIGHT="39" WIDTH="9" VPOS="309" HPOS="981"/><String STYLE="bold" WC="1." CONTENT="consectetur " HEIGHT="39" WIDTH="236" VPOS="309" HPOS="991">consectetur </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1228"/><String STYLE="bold" WC="1." CONTENT="adipiscing " HEIGHT="39" WIDTH="58" VPOS="309" HPOS="1239">adipiscing </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1298"/><String STYLE="bold" WC="1." CONTENT="elit" HEIGHT="39" WIDTH="374" VPOS="309" HPOS="1309">elit</String></TextLine>
<TextLine HEIGHT="39" WIDTH="252" VPOS="382" HPOS="1108"><String STYLE="bold" WC="1." CONTENT="Aliquam " HEIGHT="39" WIDTH="203" VPOS="382" HPOS="1108">Aliquam </String><SP HEIGHT="39" WIDTH="10" VPOS="382" HPOS="1312"/><String STYLE="bold" WC="1." CONTENT="eu" HEIGHT="39" WIDTH="37" VPOS="382" HPOS="1323">eu</String></TextLine>
</TextBlock>
</PrintSpace>
</Page>
</Layout>
</alto> |
J'essaie d'y appliquer plusieurs choses, mais mon problème principal est que j'aimerais que mes balises textline contiennent des balises string et sp sur une seule ligne sans indentation. Pour mes balises textline, par exemple, j'ai ceci en sortie :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| <TextLine/>
<String STYLE="bold"
WC="1."
CONTENT="amet"
HEIGHT="39"
WIDTH="183"
VPOS="309"
HPOS="797">amet</String>
<String STYLE="bold"
WC="1."
CONTENT="consectetur "
HEIGHT="39"
WIDTH="236"
VPOS="309"
HPOS="991">consectetur </String>
<String STYLE="bold"
WC="1."
CONTENT="adipiscing "
HEIGHT="39"
WIDTH="58"
VPOS="309"
HPOS="1239">adipiscing </String>
<String STYLE="bold"
WC="1."
CONTENT="elit"
HEIGHT="39"
WIDTH="374"
VPOS="309"
HPOS="1309">elit</String>
<SP HEIGHT="39" WIDTH="9" VPOS="309" HPOS="981"/>
<SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1228"/>
<SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1298"/>
</TextLine> |
Je souhaiterai obtenir ceci:
<TextLine/> <String STYLE="bold" WC="1." CONTENT="amet" HEIGHT="39" WIDTH="183" VPOS="309" HPOS="797">amet</String><SP HEIGHT="39" WIDTH="9" VPOS="309" HPOS="981"/><String STYLE="bold" WC="1." CONTENT="consectetur " HEIGHT="39" WIDTH="236" VPOS="309" HPOS="991">consectetur </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1228"/><String STYLE="bold" WC="1." CONTENT="adipiscing " HEIGHT="39" WIDTH="58" VPOS="309" HPOS="1239">adipiscing </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1298"/> <String STYLE="bold" WC="1." CONTENT="elit" HEIGHT="39" WIDTH="374" VPOS="309" HPOS="1309">elit</String> </TextLine>
Sauriez-vous ce que je suis censé modifier/supprimer/ajouter dans ce xsl pour y parvenir ?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
| <?xml version="1.0" encoding="utf-8"?>
<!-- Created with Liquid Studio 2019 (https://www.liquid-technologies.com) -->
<xsl:stylesheet version="1.1" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="http://www.loc.gov/standards/alto/ns-v3#" >
<xsl:output indent="yes"/>
<xsl:key name="TextBlock-by-LANG" match="//tns:TextBlock" use="@LANG" />
<xsl:variable name="lang">
<xsl:for-each select="//tns:TextBlock">
<xsl:sort select="count(key('TextBlock-by-LANG', @LANG))" data-type="number" order="descending"/>
<xsl:if test="position() = 1">
<xsl:value-of select="@LANG"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:template match="/">
<alto>
<xsl:apply-templates select="//tns:Description"/>
<xsl:apply-templates select="//tns:Styles"/>
<xsl:apply-templates select="//tns:Page">
<xsl:sort select="@PHYSICAL_IMG_NR" data-type="number" order="ascending"/>
</xsl:apply-templates>
</alto>
</xsl:template>
<xsl:template match="tns:Description">
<xsl:copy-of select="."/>
</xsl:template>
<xsl:template match="tns:Styles">
<xsl:copy-of select="."/>
</xsl:template>
<xsl:template match="tns:Page">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates select="tns:PrintSpace//tns:TextBlock">
<xsl:sort select="@VPOS" data-type="number" order="ascending"/>
<xsl:sort select="@HPOS" data-type="number" order="ascending"/>
</xsl:apply-templates>
</xsl:copy>
</xsl:template>
<xsl:template match="tns:TextBlock">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates select="tns:TextLine">
<xsl:sort select="@VPOS" data-type="number" order="ascending"/>
<xsl:sort select="@HPOS" data-type="number" order="ascending"/>
</xsl:apply-templates>
</xsl:copy>
</xsl:template>
<xsl:template match="tns:TextLine">
<xsl:copy>
<xsl:copy select="."/>
<xsl:apply-templates select="tns:String"/>
<xsl:apply-templates select="tns:SP"/>
</xsl:copy>
</xsl:template>
<xsl:template match="tns:String">
<xsl:copy select=".">
<xsl:copy-of select="@*"/>
<xsl:value-of select="@CONTENT"/>
</xsl:copy>
</xsl:template>
<xsl:template match="tns:SP">
<xsl:copy select=".">
<xsl:copy-of select="@*"/>
<xsl:value-of select="@CONTENT"/>
</xsl:copy>
</xsl:template>
<xsl:strip-space elements="*"/>
</xsl:stylesheet> |
Merci beaucoup pour votre aide et vos conseils
Partager