1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| #!/usr/bin/perl
use strict;
use warnings;
use feature qw(:5.10);
use WWW::Mechanize;
use Mojo::DOM;
sub analyse_study($) {
my ($study) = @_;
local $/ = "\r\n";
my @cols = ("Factor Value[Compound]", "Characteristics[StdInChIKey]");
open my $STUDY, "<", \$study;
my ($header, %header) = (0);
my $h = <$STUDY>;
$header{$_} = $header++ foreach map { chomp ; $_ } split /\t/, $h;
my %std_in_chi_key;
foreach my $line (<$STUDY>) {
chomp($line);
my ($compound, $std_in_chi_key) = (split /\t/, $line)[@header{@cols}];
push @{$std_in_chi_key{$compound}}, $std_in_chi_key;
#say "Compound=$compound std_in_chi_key=$std_in_chi_key";
}
if (my @duplicated = grep @{$std_in_chi_key{$_}}>1, keys %std_in_chi_key) {
say "The following Compound have several StdInChiKeys:\n",
join "\n", map { " $_: @{$std_in_chi_key{$_}}" } @duplicated;
}
}
my $mech = WWW::Mechanize->new();
my $ftp_root = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/dixa/NTC/archive/";
$mech->add_header("Accept" => "text/html");
$mech->get($ftp_root);
if ($mech->success()) {
my $root_dir = Mojo::DOM->new($mech->content);
foreach my $dir_url ($root_dir->find("a")->attr("href")->each) {
say "Looking in $dir_url";
$mech->get("$ftp_root/$dir_url/s_Study.txt");
analyse_study($mech->content);
}
}
else {
die "Can't access to $ftp_root\n";
} |
Partager