Archive Ensembl HomeArchive Ensembl Home
FindSingleGenesOnTree.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::FindSingleGenesOnTree
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db           = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $find_single_gene = Bio::EnsEMBL::Compara::RunnableDB::FindSingleGenesOnTree->new
00018   (
00019    -db         => $db,
00020    -input_id   => $input_id,
00021    -analysis   => $analysis
00022   );
00023 $find_single_gene->fetch_input(); #reads from DB
00024 $find_single_gene->run();
00025 $find_single_gene->output();
00026 $find_single_gene->write_output(); #writes to DB
00027 
00028 =cut
00029 
00030 
00031 =head1 DESCRIPTION
00032 
00033 This Analysis will take a protein tree id and look for single gene of a species.
00034 Those genes will be possible partial genes.
00035 
00036 =cut
00037 
00038 
00039 =head1 CONTACT
00040 
00041   Contact Thomas Maurel on module implementation/design detail: maurel@ebi.ac.uk
00042   Contact Javier Herrero on Split/partial genes in general: jherrero@ebi.ac.uk
00043 
00044 =cut
00045 
00046 =head1 APPENDIX
00047 
00048 The rest of the documentation details each of the object methods. 
00049 Internal methods are usually preceded with a _
00050 
00051 =cut
00052 
00053 
00054 package Bio::EnsEMBL::Compara::RunnableDB::FindSingleGenesOnTree;
00055 
00056 use strict;
00057 
00058 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00059 use List::Util qw[min max];
00060 
00061 sub fetch_input {
00062   my $self = shift @_; 
00063 
00064   my $protein_tree_id  = $self->param('protein_tree_id') or die "'protein_tree_id' is an obligatory parameter";
00065   my $protein_tree_adaptor = $self->compara_dba->get_ProteinTreeAdaptor();
00066       # if fetch_node_by_node_id is insufficient, try fetch_tree_at_node_id
00067   my $protein_tree = $protein_tree_adaptor->fetch_node_by_node_id($protein_tree_id) or die "Could not fetch protein_tree by id=$protein_tree_id";
00068   $self->param('protein_tree', $protein_tree);
00069   $self->dbc->disconnect_when_inactive(1);
00070 }
00071 
00072 sub run {
00073   my $self = shift @_; 
00074   my $protein_tree = $self->param('protein_tree');
00075   my $kingdom = $self->param('kingdom') or '(none)';
00076   my @output_ids = (); 
00077   my @perc_pos=();
00078   my $first_loop=0;
00079   my %members=();
00080   my %pos_occupancy=();
00081 
00082 # get all leaves,  all members of the tree
00083 my @aligned_members = @{$protein_tree->get_all_leaves};
00084 my @single_in_tree =();
00085 #for each member get member of a species alone in the alignment
00086 for (my $i=0;$i<@aligned_members;$i++)
00087 {
00088   my $species_i=$aligned_members[$i]->genome_db->name;
00089   my $species_cpt=0;
00090   my $final_score=0;
00091 #compare to other members
00092   for (my $j=0;$j<@aligned_members;$j++)
00093   {
00094 #not the same members
00095     if($i==$j)
00096     {   
00097       next;
00098     }   
00099     if ($aligned_members[$j]->genome_db->name eq $species_i)
00100     {   
00101       $species_cpt++;
00102     }   
00103   }
00104   if ($species_cpt==0)
00105   {
00106     push(@single_in_tree,$aligned_members[$i]);
00107   }
00108 }
00109 
00110 foreach my $aligned_member (@single_in_tree){
00111 #Push all result into an array
00112   push @output_ids, {
00113     'gene_stable_id' => $aligned_member->gene_member->stable_id,
00114       'protein_tree_stable_id' => $protein_tree->stable_id,
00115       'species_name' => $aligned_member->genome_db->name,
00116       'kingdom' => $kingdom,
00117  };
00118 
00119 }
00120 $self->param('output_ids', \@output_ids);
00121 }
00122 sub write_output {
00123   my $self = shift @_;
00124  
00125   my $output_ids = $self->param('output_ids');
00126  
00127   $self->dbc->disconnect_when_inactive(0);
00128   $self->dataflow_output_id($output_ids, 3);
00129 }
00130 
00131 
00132 
00133 1;
00134