1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| #!C:\Perl\bin\perl.exe
&robots;
@X = "http://www.google.com/robots.txt";
sub robots
{
use WWW::RobotRules;
my $rules = WWW::RobotRules->new('Mozilla/5.0 (compatible; +http://www.shunix.com/bot.htm)');
use LWP::Simple qw(get);
{
my $robots_txt = get @X;
$rules->parse(@X, $robots_txt) if defined $robots_txt;
}
# Now we can check if a URL is valid for those servers
# whose "robots.txt" files we've gotten and parsed:
# $c = get @X;
if($rules->allowed(@X)) {
print "Un fichier robots.txt est présent";
} else { print " Pas de fichier robots.txt"; }
} |
Partager