1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
| #!/usr/bin/perl
use strict;
use warnings;
use HTML::Parser;
use LWP::Simple;
my $baseurl = 'http://www.pharmgkb.org/do/serve?objId=';
my $flag = 0;
die "usage: $0 role code\n"
if @ARGV != 2;
my $role = $ARGV[0];
my $code = $ARGV[1];
print "$code\n";
print "$role\n";
die "not a valid a role\n"
if $role ne ('Gene' || 'Disease' || 'Drug');
my $url = $baseurl.$code.'&objCls='.$role;
my $page = get($url);
my @tab_gene= @_ ;
my @tab_disease = @_;
my @tab_drug= @_;
#Parser
my $parser = HTML::Parser->new(start_h => [\&start_rtn,"tag, attr"],
text_h => [\&text_rtn, "text"],
end_h => [\&end_rtn, "tag"]
);
sub start_rtn {
my ($tag, $attr) = @_;
if ($tag =~ /^title$/){
$flag = 1;
}
if ($tag =~ /^a$/
and defined $attr->{href}
and $attr->{href} =~ /^\/do\/serve\?objId=PA[0-9]+&objCls=Gene$/
){
$flag = 2;
}
if ($tag =~ /^a$/ and defined $attr->{href}
and $attr->{href} =~ /^\/do\/serve\?objId=PA[0-9]+&objCls=Disease$/
){
$flag = 3;
}
if ($tag =~ /^a$/ and defined $attr->{href}
and $attr->{href} =~ /^\/do\/serve\?objId=PA[0-9]+&objCls=Drug$/
){
$flag = 4;
}
}
sub text_rtn {
my ($text) = @_;
$text =~ s/\n//g;
if($flag == 1){
print "Le titre : $text \n";
}
if($flag == 2){
push(@tab_gene, $text);
@tab_gene = split (/ \/ /, $text);
print "Gene relatifs : @tab_gene\n";
}
if($flag == 3){
push(@tab_disease, $text);
@tab_disease = split (/ \/ /, $text);
print "Maladies relatives : @tab_disease/ \n";
}
if($flag == 4){
push(@tab_drug, $text);
@tab_drug = split (/ \/ /, $text);
print "Médicaments relatifs : @tab_drug\n";
}
}
sub end_rtn {
my ($tag) = @_;
if ($tag =~ /^\/title$/){
$flag = 0;
}
if ($tag =~ /^\/a$/ && ($flag==2 || $flag==3 || $flag==4)){
$flag = 0;
}
}
#start parsing
$parser->parse($page);
#end parser
$parser ->eof; |