1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| #!/usr/bin/perl
use strict;
use warnings;
use Carp qw(confess);
use Getopt::Long;
use List::MoreUtils qw(uniq);
use Bio::SeqIO;
my %hash;
my ($fasta_file,$out_file);
GetOptions("fasta=s" => \$fasta_file,"out=s" => \$out_file);
open(my $out,'>',$out_file) or die "$out_file : $!\n\n";
my $in = Bio::SeqIO->new( -file => $fasta_file, '-format' => 'Fasta' );
# read file in hash
while ( my $seq = $in->next_seq()){
my $id = $seq->id() ;
my $sequence = $seq->seq ;
$hash{$sequence}=$id;
}
my @all_seq=keys (%hash); # @seq contient toutes les sequences
my @sort_all_seq = sort @all_seq;
my @uniqueseq;
my $find=0;
foreach my $seqs (@sort_all_seq){
$find=0;
my $seq=uc($seqs); #uppercase
foreach (@uniqueseq){
if ($_=~/$seq/){ # si ma sequence contient
$_=$seq; #remplace avec la plus petite sequence
$find=1;
}
if ($seq=~/$_/){ # si ma sequence contient
$find=1;
}
}
if ($find==0){
push @uniqueseq,$seq;
}
}
my @final = uniq @uniqueseq;
foreach (@final){
print ">$hash{$_}\n$_\n";
} |
Partager