1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
#!/usr/bin/perl5
#################################################################################
# selection les documents
# Usage: Selection.pl <documents> <rep_documents> <rep_nos_documents>
# Exemple: selection.pl /tmp/test/dea/doc /tmp/test/dea/sourcecp /tmp/test/dea/tri
#################################################################################
unless (@ARGV == 3){
die "\n\Usage : $0 <documents> <rep_documents> <rep_nos_documents>\n";
}
if (!open(DOC1, "$ARGV[0]")){
print "\n Erreur d'ouverture de $ARGV[0] : Documents \n";
exit(0);
}
if (!opendir(DIR1, "$ARGV[1]")){
print "\n Erreur d'ouverture de $ARGV[1] : Répertoire des Documents \n";
exit(0);
}
@files = grep !/^\./, readdir DIR1;
$ext=".html";
if (!opendir(DIR2, ">$ARGV[2]")){
system("mkdir $ARGV[2]");
opendir(DIR2, "$ARGV[2]");
}
while ($_ = <DOC1>){
($num,$titre,$poids) = split(/\s+/,$_);
$document{$num} = substr($titre,0,10);
$doc{$num}=$titre;
foreach $file (@files) {
#print "$file\n";
$document{$num}=lc($document{$num});
if ($document{$num} eq substr($file,0,10)){
$nfile=substr($file,0,10);
system("gzip -d $ARGV[1]$file");
system("cp $ARGV[1]$nfile $ARGV[2]$nfile");
#print "$num, $ARGV[2], $ARG[2]$nfile, $doc{$num}\n";
&subextrait($num,$ARGV[2],$ARGV[2].$nfile,$doc{$num});
system("rm $ARGV[2]$nfile");
}
}
}
sub subextrait{
my($numfile,$repfile,$file,$titrefile)=@_;
#print "$numfile, $repfile, $file, $titrefile\n";
if (!open(IN, "$file")){
print "\n Erreur d'ouverture de $file : Documents \n";
exit(0);
}
if (!open(OUT, ">$repfile$numfile")){
print "\n Erreur d'ouverture de $repfile$numfile : Documents \n";
exit(0);
}
$flag=0;
while ($a = <IN>){
$a=~s/^\s+|\s+$//g;
@motlist=split(/ +/,$a);
if ($motlist[0] eq "<DOCNO>$titrefile</DOCNO>"){
print OUT "<DOC>\n";
#print OUT "$motlist[0]\n";
$flag=1;
}
if ($flag==1){
print OUT "@motlist\n";
if ($motlist[0] eq "</DOC>"){
last;
}
}
}
system("cp $repfile$numfile $repfile$numfile$ext");
system("rm $repfile$numfile");
close(IN);
close(OUT);
}
close(DOC1);
closedir(DIR1);
closedir(DIR2); |
Partager