1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
| #!/usr/bin/perl
use warnings;
use strict;
use XML::Twig;
use File::Temp;
my $i = 1;
my %twig_handler = ( "div" => \&ParsingDiv, ); # Parser les <div ...>
my $twig = new XML::Twig( TwigHandlers => \%twig_handler, );
$twig->parsefile("estrep.xml");
sub ParsingDiv {
my ( $twig, $BaliseDiv ) = @_; ## stocke les div parsé
## on spécifie les div concernés (ici div type="")
my $DivType = $BaliseDiv->{"att"}->{"type"};
##on ne parse que les div article
if ( $DivType eq "article" ) {
# div parent
my $DivParent = $BaliseDiv->parent('div');
# on va nommé les fichiers xml écrits
my $Articlefile = "article$i" . ".xml";
##on ouvre les fichiers concernés en écriture
open( my $fh, '>:encoding(UTF-8)', $Articlefile )
or die("souci $Articlefile $!\n");
##impression élément du xml
print {$fh} "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" . "\n";
print {$fh} "<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">";
my $HeaderBalise = $twig->root->first_child("teiHeader");
$HeaderBalise->print($fh);
print {$fh} "<text><body>";
print {$fh} "<div";
foreach ( keys %{$DivParent->{"att"}} ) {
print {$fh} " $_=\"$DivParent->{att}->{$_}\"";
}
print {$fh} ">";
$BaliseDiv->print($fh);
print {$fh} "</div>";
print {$fh} "</body></text></TEI>";
close($fh);
##on indente correctement le fichier en appelant le PrettyPrintXml
PrettyPrintXml($Articlefile);
$i++;
}
}
#================================================
# reindet un fichier XML
# require XML::Twig, File::Temp
#================================================
sub PrettyPrintXml {
my ($XMLFile) = @_;
# Temp file
my ( $FhTemp, $TempFile )
= File::Temp::tempfile( UNLINK => 1, SUFFIX => ".xml", );
binmode( $FhTemp, ":utf8" );
my $Twig = new XML::Twig( PrettyPrint => "indented", );
$Twig->parsefile($XMLFile);
$Twig->print($FhTemp);
close($FhTemp);
$Twig->purge;
# Copy temp to file
rename( $TempFile, $XMLFile );
return;
} |
Partager