1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
| #!/usr/bin/perl
use strict;
use warnings;
use HTML::Parser;
use LWP::Simple;
#Variables
my $baseurl = 'http://www.google.com/';
my $flag = 0;
die "usage: $0 site gene\n"
if @ARGV != 2;
my $pharmGKB = $ARGV[0];
my $nom_gene = $ARGV[1];
#Page URL où parser
my $url = $baseurl.'search?hl=fr&q='.$pharmGKB.'+'.$nom_gene.'&btnG=Rechercher&meta=';
print "$url\n";
my $PAGE = get($url);
sub check { defined $_[0] and $_[0] =~ m/$_[1]/ }
#Parser
my $parser = HTML::Parser->new(start_h => [\&start_rtn,"tag, attr"],
text_h => [\&text_rtn, "text"],
end_h => [\&end_rtn, "tag"]
);
sub start_rtn {
my ($tag, $attr) = @_;
if ($tag =~ /^a$/
and check( $attr->{href}, qr{^http://www.pharmgkb.org/do/serve\?objId=} )
and check( $attr->{class}, qr{^l$} )
and check( $attr->{onmousedown}, qr{return clk\(this\.href,'','','res','1',''\)} ) ){
$flag = 1;
my $URL = $attr->{href};
print "$URL";
}
}
sub text_rtn {
my ($text) = @_;
$text =~ s/\n/ /g;
if($flag == 1){
print "$text\n";
}
}
sub end_rtn {
my ($tag) = @_;
if ($tag =~ /^\/a$/ && $flag == 1){
$flag = 0;
}
}
#start parsing
$parser->parse($PAGE);
#end parser
$parser->eof; |
Partager