Archive Ensembl HomeArchive Ensembl Home
CAFETable.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::CAFEDynamics
00024 
00025 =head1 SYNOPSIS
00026 
00027 =head1 DESCRIPTION
00028 
00029 This RunnableDB calculates the dynamics of a ncRNA family (based on the tree obtained and the CAFE software) in terms of gains losses per branch tree. It needs a CAFE-compliant species tree.
00030 
00031 =head1 INHERITANCE TREE
00032 
00033 Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable
00034 
00035 =head1 APPENDIX
00036 
00037 The rest of the documentation details each of the object methods.
00038 Internal methods are usually preceded with an underscore (_)
00039 
00040 =cut
00041 
00042 package Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::CAFETable;
00043 
00044 use strict;
00045 use Data::Dumper;
00046 use Bio::EnsEMBL::Compara::Graph::NewickParser;
00047 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00048 
00049 sub param_defaults {
00050     return {
00051         'clusterset_id'  => 1,
00052     };
00053 }
00054 
00055 =head2 fetch_input
00056 
00057     Title     : fetch_input
00058     Usage     : $self->fetch_input
00059     Function  : Fetches input data from database
00060     Returns   : none
00061     Args      : none
00062 
00063 =cut
00064 
00065 sub fetch_input {
00066     my ($self) = @_;
00067 
00068     unless ( $self->param('cafe_tree_string') ) {
00069         die ('cafe_species_tree can not be found');
00070     }
00071 
00072     unless ( $self->param('cafe_species') ) {
00073         # get the species you want to include and put then in $self->param('cafe_species')
00074     }
00075 
00076     unless ( $self->param('type') ) {
00077         die ('type is mandatory [prot|nc]');
00078     }
00079 
00080     if ($self->param('type') eq 'nc') {
00081         my $ncTree_Adaptor = $self->compara_dba->get_NCTreeAdaptor();
00082         $self->param('adaptor', $ncTree_Adaptor);
00083     } elsif ($self->param('type') eq 'prot') {
00084         my $protTree_Adaptor = $self->compara_dba->get_ProteinTreeAdaptor();
00085         $self->param('adaptor', $protTree_Adaptor);
00086     } else {
00087         die 'type must be [prot|nc]';
00088     }
00089 
00090     return;
00091 }
00092 
00093 sub run {
00094     my ($self) = @_;
00095 
00096     $self->get_cafe_tree_from_string();
00097     my $cafe_table = $self->get_cafe_table_from_db();
00098 }
00099 
00100 sub write_output {
00101     my ($self) = @_;
00102     my $cafe_table_file = $self->param('cafe_table_file');
00103     my $cafe_tree_string = $self->param('cafe_tree_string');
00104     print STDERR "$cafe_table_file\n" if ($self->debug());
00105     print STDERR "$cafe_tree_string\n" if ($self->debug());
00106     $self->dataflow_output_id (
00107                                {
00108                                 'cafe_table_file' => $cafe_table_file,
00109                                 'cafe_tree_string' => $cafe_tree_string,
00110                                }, 1
00111                               );
00112 }
00113 
00114 
00115 ###########################################
00116 ## Internal methods #######################
00117 ###########################################
00118 
00119 sub get_cafe_tree_from_string {
00120     my ($self) = @_;
00121     my $cafe_tree_string = $self->param('cafe_tree_string');
00122     print STDERR "$cafe_tree_string\n" if ($self->debug());
00123     my $cafe_tree = Bio::EnsEMBL::Compara::Graph::NewickParser::parse_newick_into_tree($cafe_tree_string);
00124     $self->param('cafe_tree', $cafe_tree);
00125     return;
00126 }
00127 
00128 sub get_cafe_table_from_db {
00129     my ($self) = @_;
00130     my $species = $self->param('cafe_species');
00131     my $adaptor = $self->param('adaptor');
00132 
00133     my $mlss_id = $self->param('mlss_id');
00134 
00135     my $cafe_table_file = "cafe_${mlss_id}.tbl";
00136 
00137     my $cafe_table_output = $self->param('work_dir') . "/" . $cafe_table_file;
00138     $self->param('cafe_table_file', $cafe_table_file);
00139 
00140     print STDERR "CAFE_TABLE_OUTPUT: $cafe_table_output\n" if ($self->debug());
00141     open my $cafe_fh, ">", $cafe_table_output or die $!;
00142 
00143     print $cafe_fh "FAMILY_DESC\tFAMILY\t", join("\t", @$species), "\n";
00144 
00145     print STDERR Dumper $adaptor if ($self->debug());
00146 #    my $clusterset = $adaptor->fetch_node_by_node_id($self->param('clusterset_id'));
00147 ##    my $clusterset = $self->compara_dba->get_NCTreeAdaptor->fetch_node_by_node_id($self->param('clusterset_id'));
00148 #    my $all_trees = $clusterset->children();
00149     my $all_trees = $adaptor->fetch_all();
00150     print STDERR scalar @$all_trees, " trees to process\n" if ($self->debug());
00151     my $ok_fams = 0;
00152     for my $subtree (@$all_trees) {
00153 #        my $subtree = $tree->children->[0];
00154         my $root_id;
00155         if (defined $subtree) {
00156             $root_id = $subtree->node_id();
00157         } else {
00158             print STDERR "Undefined subtree for " . $subtree->node_id() . "\n";
00159             next;
00160         }
00161         next if ($root_id == 1); ## This is the clusterset! We have to avoid taking the trees with 'type' 'clusterset'. Should be included in the gene tree API (nc_tree / protein_tree) at some point.
00162         next if ($subtree->tree->tree_type() eq 'superproteintree'); ## For now you also get superproteintrees!!!
00163         print STDERR "ROOT_ID: $root_id\n" if ($self->debug());
00164         my $tree = $adaptor->fetch_node_by_node_id($root_id);
00165         my $name = $self->get_name($tree);
00166         print STDERR "MODEL_NAME: $name\n" if ($self->debug());
00167         my $tree_members = $tree->get_all_leaves();
00168         my %species;
00169         for my $member (@$tree_members) {
00170             my $sp;
00171             eval {$sp = $member->genome_db->name};
00172             next if ($@);
00173             $sp =~ s/_/\./;
00174             $species{$sp}++;
00175         }
00176 
00177         my @flds = ($name, $root_id);
00178         for my $sp (@$species) {
00179             push @flds, ($species{$sp} || 0);
00180         }
00181         if ($self->has_member_at_root([keys %species])) {
00182             $ok_fams++;
00183             print $cafe_fh join("\t", @flds), "\n";
00184         }
00185     }
00186     close($cafe_fh);
00187     print STDERR "$ok_fams families in final table\n" if ($self->debug());
00188     return;
00189 }
00190 
00191 sub has_member_at_root {
00192     my ($self, $sps) = @_;
00193     my $sp_tree = $self->param('cafe_tree');
00194     my $tree_leaves = $sp_tree->get_all_leaves();
00195     my @leaves;
00196     for my $sp (@$sps) {
00197         my $leaf = get_leaf($sp, $tree_leaves);
00198         if (defined $leaf) {
00199             push @leaves, $leaf
00200         }
00201     }
00202     if (scalar @leaves == 0) {
00203         return 0;
00204     }
00205     my $lca = $sp_tree->find_first_shared_ancestor_from_leaves([@leaves]);
00206     return !$lca->has_parent();
00207 }
00208 
00209 sub get_leaf {
00210     my ($sp, $leaves) = @_;
00211     for my $leaf (@$leaves) {
00212         if ($leaf->name() eq $sp) {
00213             return $leaf;
00214         }
00215     }
00216     return undef;
00217 }
00218 
00219 sub get_name {
00220     my ($self, $tree) = @_;
00221     my $name;
00222     if ($self->param('type') eq 'nc') {
00223         $name = $tree->tree->get_tagvalue('model_name');
00224     } else {
00225         $name = $tree->node_id();
00226     }
00227     return $name;
00228 }
00229 
00230 1;