1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
| #!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::GenBank;
use FileHandle;
#----------------------------- Programme GB_AccFile_to_FastaFile_boucle.pl:
my $rep = 'P:/Theorie/Driss/GB_consensus';
my $data_file = FileHandle->new ('>'.$rep.'/GB_data2.txt');
my $all_seq_file = FileHandle->new ('>'.$rep.'/all_seq2.txt');
my $gb = new Bio::DB::GenBank;
open my $in_file_fh, '<', $rep.'/sequences2/data.txt' or die;
my (%liste_acc, %liste_seq );
=h
structure des 2 hashes
-----------------------
my %liste_acc = (
'39S_ribosomal_protein_L52' => ['NM_178336', 'NM_180982', 'NM_181304', 'NM_181305', 'NM_181306', 'NM_181307'],
'ADP-ribosylation_factor-like_protein_1' => ['NM_001177'],
'fibronectin_isoform_3_preproprotein' => ['NM_002026', 'NM_054034', 'NM_212474', 'NM_212475', 'NM_212476', 'NM_212478', 'NM_212482'],
'EIF2A' => ['NM_032025'],
);
my %liste_seq = (
'39S_ribosomal_protein_L52' => 'agtggcgactacagcagggactg..caataaa',
'ADP-ribosylation_factor-like_protein_1' => 'ctttccaaagattagactcagtatgagagtaggtgaggaacatagtttgtgtaa..tactat',
'fibronectin_isoform_3_preproprotein' => 'gaacgacacattccacaagcgtcatgaag..accac',
);
=cut
while (my $line = <$in_file_fh> ){
my ($name, $target_seq, $acc) = split (/\s*\t\s*/, $line);
$name =~ s/^\s//;
$name =~ s/\s/_/g;
$name =~ s/[^-\w]/_/g;
push (@{$liste_acc{$name}}, $acc);
$liste_seq{$name} = $target_seq;
}
close $in_file_fh;
foreach my $name ( keys %liste_acc ){
my $seq_file = FileHandle->new ('>'.$rep.'/'.$name.'.txt');
print $seq_file ">${name}_RefSeq\n$liste_seq{$name}\n";
foreach my $acc ( @{$liste_acc{$name}} ){
eval { $gb->get_Seq_by_acc($acc) };
if ($@) {
print "ERREUR $acc ($name)\n";
}
else{
my $info = $gb->get_Seq_by_acc($acc);
my $description = $info->desc();
my $seq = $info->seq();
print $data_file "$name\t$acc\t$description\n";
print $seq_file ">$acc\n$seq\n";
print $all_seq_file ">${name}_$acc\n$seq\n";
}
}
} |
Partager