1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
|
#calcul le IDF d'un mot
use strict;
use warnings;
use autodie;
use utf8;
my $nbre_ligne = 4159480;
open(my $CorpusMots, '<:utf8', '/home/lenovo/Bureau/MesTravaux/IDF/test') or die "Unable to open for read: $!";
open(my $CorpusPhrases, '<:utf8', '/home/lenovo/Bureau/MesTravaux/IDF/phrases') or die "Unable to open for read: $!";
open my $fh_resultat, ">:utf8", '/home/lenovo/Bureau/MesTravaux/IDF/result';
my $word;
while(defined( $word = <$CorpusMots> )) {
chomp $word ;
$word =~ s/^\s*|\s*$//g;
my $nb_phrase = 0;
my $idf;
my $ph;
while (defined ( $ph = <$CorpusPhrases> ))
{
my @tab = split(/ /, $ph);
chomp @tab ;
foreach my $val(@tab) {
if($word eq $val)
{
$nb_phrase = $nb_phrase + 1;
last;
}
}
}
#calcul log
if ($nb_phrase == 0)
{
$idf =0;
}
else
{
$idf = (log(3/$nb_phrase))/log(10);
}
print $fh_resultat "$word:$idf\n";
} |
Partager