1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
| #!/usr/bin/perl
use strict;
use warnings;
use feature qw(:5.10);
use Data::Dumper;
use List::Util qw(max);
my $mafft = $ARGV[0];
sub print_mafft {
my ($table, $max, $col) = @_;
$max //= 1;
my @col_names = $col ? @$col : @{$table->[0]};
say join "\t", ".", @col_names;
my @lines = @col_names;
foreach my $line (@{$table->[1]}{@col_names}) {
my $line_name = shift @lines;
say join "\t", $line_name, map { ($line->{$_} // $table->[1]->{$_}->{$line_name} // "0") / $max } @col_names;
}
}
# Representation en mémoire :
# @mafft = ( [ qw(p3 p5 p2 p4 p1) ],
# { p3 => { p3 => 0 },
# p5 => { p3 => 12, p5 => 0 },
# ... } );
# Parsing de $mafft
open my $MAFFT, "<", $mafft or die "Can't read $mafft: $!";
# Ignorer les 3 premières lignes
<$MAFFT>;<$MAFFT>;<$MAFFT>;
#my @col_names;
my @mafft;
my (@col_names, @col_matrix_line);
my $current_line_name;
my $max = 0;
while (defined(my $line = <$MAFFT>)) {
if (my ($line_name) = $line =~ /\|\s*(.+?)\s*\|/) {
# get a line name
push @{$mafft[0]}, $line_name;
}
elsif (@{$mafft[0]}) {
$line =~ s/^\s*//;
# Initialize the column names vector
@col_names = @{$mafft[0]} if !@col_names;
# For each new matrix line, remove the first column name (distance from one to itself)
if (!@col_matrix_line) {
$current_line_name = shift @col_names;
@col_matrix_line = @col_names;
}
chomp($line);
my @columns = split /\s+/, $line;
# get et remove the col_names which have to be updated with this line
my @col_file_line = splice @col_matrix_line, 0, scalar(@columns);
@{$mafft[1]->{$current_line_name}}{@col_file_line} = @columns;
$max = max($max, @columns);
}
}
print_mafft(\@mafft, $max);
# Pour voir la matrice non normalisée, utiliser print_mafft(\@mafft) |
Partager