1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
| #!/usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Tools::Run::Alignment::Clustalw;
use FileHandle;
my $file = 'P:/Theorie/YANN/Fadoua/mutation_seq/all_contigs.txt';
my $in = Bio::SeqIO->new(-file => $file , '-format' => 'fasta');
# GAG = E104 CTG = R164 GGT = G238
my $TEM_ref = 'AACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCC';
# AGC = S130 GAC = D179 GGC = G238 GAG ou GAA = E240
my $SHV_ref = 'TCTGTGCCGCCGTCATTACCATGAGCGATAACAGCGCCGCCAATCTGCTGCTGGCCACCGTCGGCGGCCCCGCAGGATTGACTGCCTTTTTGCGCCAGATCGACGACAACGTCACCCGCCTTGACCGCTGGGAAACGGAACTGAATGAGGCGCTTCCCGGCGACGCCCGCGACACCACTACCCCGGCCAGCATGGCCGCGACCCTGCGCAAGCTGCTGACCAGCCAGCGTCTGAGCGCCCGTTCGCAACGGCAGCTGCTGCAGTGGATGGTGGACGATCGGGTCGCCGGACCGTTGATCCGCTCCGTGCTGCCGGCGGGCTGGTTTATCGCCGATAAGACCGGAGCTGGCGAGCGGGGTGCGCGCGGGATTGTCGCCCTGCTTGGCCCGAATAACAAAG';
=h
CTX-1 CTX-2 Pyro
Subgroup CTX-M
AAAAAATCTGACCTGG GGTGACTATGGCA 1A
AAAAAATCTGACCTGG GGTGGCTATGGCA 1B
AAAAAATCTGACCTTG GGTGGCTATGGCA 1C
AAAAAATCTGACCTTG GGTGACTATGGCA 1D
AAAAAATCTGACTTGG GGTGACTATGGCA 1E
AAAAAATCTGACCTGG GGTGACTATGGTA 1F
AAAAAATCTGACCTGG GGTGGCTATGGTA 1G
AAAAAATCTGACCTGG TGTGACTATGGTA 1H
AAGAAGAGCGACCTGG GGAGATTATGGCA 2A
AGAGCAAGCGACCTGG GGAGATTATGGCA 2B
AAGGCGAGCGACCTGG GGAGATTATGGCA 2C
AAGCCTGCCGATCTGG GGCGGCTACGGCA 9A
AAGCCTGCCGATCTGG GGCGACTACGGCA 9B
AAATCCTCGGACCTGA GGTGATTATGGTA 8A
AAATCTTCAGACCTGA GGTGATTATGGTA 8B
AAGCCCTCAGACTTGA GGCGGTTATGGTA 25A
AAGCCCTCAGACTTGA GGCGATTATGGTA 25B
AAGCCCTCAGACTTGG GGCGGTTATGGTA 25C
=cut
my $CTX1_ref = 'ATGTGCAGCACCAGTAAAGTGATGGCCGTGGCCGCGGTGCTGAAGAAAAGTGAAAGCGAACCGAATCTGTTAAATCAGCGAGTTGAGATCAAAAAATCTGACTTGGTTAACTATAATCCGATTGCGGAAAAGCACGTCGATGGGACGATGTCACTGGCTGAGCTTAGCGCGGCCGCGCTACAGTACAGCGATAACGTGGCGATGAATAAGCTGATTTCTCA';
my $CTX2_ref = 'TGGTGACATGGATGAAAGGCAATACCACCGGTGCAGCGAGCATTCAGGCTGGACTGCCTGCTTCCTGGGTTGTGGGGGATAAAACCGGCAGCGGTGACTATGGCACCACCAACGATATCGCGGTGATCTGGCCAAAAGATCGTGCGCCGCTGATTCTGGTCACTTACTTCACCCA';
my %sequences_ref = (
TEM => $TEM_ref,
SHV => $SHV_ref,
'CTX-1' => $CTX1_ref,
'CTX-2' => $CTX2_ref,
);
while ( my $seq = $in->next_seq() ) {
my ($gene, $bact) = $seq->primary_id =~ m/([A-Z\d-]+)_(\w+)/i;
# on aligne le contig à notre séquence de référence
if (! exists $sequences_ref{$gene}){
print "PROBLEME DE REFERENCE POUR $gene\t$bact\n";
}
else {
my $seqobj_ref = Bio::Seq->new( -display_id => $gene.'_ref',
-seq => $sequences_ref{$gene});
&ClustalW2 ([ $seq, $seqobj_ref], $seq->primary_id );
}
print $sequences_ref{$gene}."\n";
}
sub ClustalW2 {
my $seq_array_ref = $_[0];
my $id = $_[1];
my $fich_fsa = '>P:/Theorie/YANN/Fadoua/mutation_seq/ref_ali/'.$id.'.fsa';
my $fich_msf = '>P:/Theorie/YANN/Fadoua/mutation_seq/ref_ali/'.$id.'.msf';
my @params = (
'gapopen' => 15,
'PAIRGAP' => 0,
'ktuple' => 4,
'type' => 'dna',
'outfile' => $fich_msf,
'format' => 'Fasta',
'outorder' => 'aligned',
);
# and pass the factory a reference to that array
my $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
$factory->executable("C:/ClustalW2/clustalw2.exe");
my $aln = $factory->align($seq_array_ref);
# création du fichier fasta
my $in_msf = Bio::AlignIO->new(-file => $fich_msf , -format => 'msf');
my $out_fsa = Bio::AlignIO->new(-file => ">".$fich_fsa , -format => 'fasta');
while ( $aln = $in_msf->next_aln() ) {
$out_fsa->write_aln($aln);
}
} |
Partager