Archive Ensembl HomeArchive Ensembl Home
SetNeighbourNodes.pm
Go to the documentation of this file.
00001 #
00002 # Ensembl module for Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::SetNeighbourNodes
00003 #
00004 # Cared for by Kathryn Beal <kbeal@ebi.ac.uk>
00005 #
00006 # Copyright Ewan Birney
00007 #
00008 # You may distribute this module under the same terms as perl itself
00009 
00010 # POD documentation - main docs before the code
00011 =head1 NAME
00012 
00013 Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Gerp 
00014 
00015 =head1 SYNOPSIS
00016 
00017     $set_neighbour_nodes->fetch_input();
00018     $set_neighbour_nodes->run();
00019     $set_neighbour_nodes->write_output(); writes to database
00020 
00021 =head1 DESCRIPTION
00022 
00023 
00024 =head1 AUTHOR - Kathryn Beal
00025 
00026 This modules is part of the Ensembl project http://www.ensembl.org
00027 
00028 Email kbeal@ebi.ac.uk
00029 
00030 =head1 CONTACT
00031 
00032 This modules is part of the EnsEMBL project (http://www.ensembl.org)
00033 
00034 Questions can be posted to the ensembl-dev mailing list:
00035 dev@ensembl.org
00036 
00037 
00038 =head1 APPENDIX
00039 
00040 The rest of the documentation details each of the object methods. 
00041 Internal methods are usually preceded with a _
00042 
00043 =cut
00044 
00045 package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::SetNeighbourNodes;
00046 
00047 use strict;
00048 
00049 #use Bio::EnsEMBL::Utils::Exception qw(throw);
00050 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00051 #use Bio::EnsEMBL::Compara::GenomicAlignBlock;
00052 #use Bio::EnsEMBL::Compara::GenomicAlign;
00053 #use Bio::EnsEMBL::Compara::AlignSlice;
00054 #use Bio::SimpleAlign;
00055 #use Bio::AlignIO;
00056 #use Bio::LocatableSeq;
00057 #use Getopt::Long;
00058 
00059 use Bio::EnsEMBL::Hive::Process;
00060 our @ISA = qw(Bio::EnsEMBL::Hive::Process);
00061 
00062 $| = 1;
00063 
00064 my $flanking_region = 1000000;
00065 
00066 =head2 fetch_input
00067 
00068     Title   :   fetch_input
00069     Usage   :   $self->fetch_input
00070     Function:   Fetches input data for gerp from the database
00071     Returns :   none
00072     Args    :   none
00073 
00074 =cut
00075 
00076 sub fetch_input {
00077   my( $self) = @_;
00078 
00079   #create a Compara::DBAdaptor which shares the same DBI handle
00080   #with $self->db (Hive DBAdaptor)
00081   $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
00082   $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
00083 
00084   #read from analysis table
00085   $self->get_params($self->parameters); 
00086 
00087   #read from analysis_job table
00088   $self->get_params($self->input_id);
00089 
00090 }
00091 
00092 sub run {
00093     my( $self) = @_;
00094 
00095     my $root_id = $self->root_id;
00096 
00097     my $start = $self->start;
00098     my $end = $self->end;
00099     my $step = $self->step;
00100     my $mlss_id = $self->mlss_id;
00101     my $alignment_type = $self->alignment_type;
00102     my $set_of_species = $self->set_of_species;
00103 
00104     my $method_link_species_set_adaptor = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor();
00105     my $genomic_align_tree_adaptor = $self->{'comparaDBA'}->get_GenomicAlignTreeAdaptor();
00106     my $genomic_align_block_adaptor = $self->{'comparaDBA'}->get_GenomicAlignBlockAdaptor();
00107     
00108     my $method_link_species_set;
00109      if ($mlss_id) {
00110     $method_link_species_set = $method_link_species_set_adaptor->fetch_by_dbID($mlss_id);
00111     if (!$method_link_species_set) {
00112         die "Cannot find a MLSS for ID $mlss_id\n";
00113     }
00114      } else {
00115     $method_link_species_set = $method_link_species_set_adaptor->fetch_by_method_link_type_registry_aliases(
00116                                                         $alignment_type, [split(":", $set_of_species)]);
00117     if (!$method_link_species_set) {
00118         die "Cannot find a MLSS for $alignment_type and $set_of_species\n";
00119     }
00120      }
00121 #     if ($end and $end < ($start + $step)) {
00122 #   $step = $end - $start;
00123 #     }
00124 #     my $all_genomic_align_trees = fetch_trees($self, $method_link_species_set->dbID, $start, $step);
00125     
00126 
00127 #     while (@$all_genomic_align_trees) {
00128     print "$start: " if $self->debug;
00129     #foreach my $this_genomic_align_tree (@$all_genomic_align_trees) {
00130       my $this_genomic_align_tree = $genomic_align_tree_adaptor->fetch_node_by_node_id($root_id);
00131  
00132         my $all_nodes = $this_genomic_align_tree->get_all_nodes_from_leaves_to_this();
00133         foreach my $this_node (@{$all_nodes}) {
00134         if ($this_node->is_leaf()) {
00135             $genomic_align_tree_adaptor->set_neighbour_nodes_for_leaf($this_node);
00136         } else {
00137             set_neighbour_nodes_for_internal_node($this_node);
00138         }
00139         }
00140         print " ", $this_genomic_align_tree->node_id . "(" . @$all_nodes . ")" if $self->debug;
00141         $genomic_align_tree_adaptor->update_neighbourhood_data($this_genomic_align_tree);
00142         #     foreach my $this_node (@{$this_genomic_align_tree->get_all_nodes_from_leaves_to_this()}) {
00143         #       print "  *** ",
00144         #           $this_node->name,":",
00145         #           $this_node->genomic_align->genome_db->name,":",
00146         #           $this_node->genomic_align->dnafrag->name,":",
00147         #           $this_node->genomic_align->dnafrag_start,":",
00148         #           $this_node->genomic_align->dnafrag_end,":",
00149         #           $this_node->genomic_align->dnafrag_strand,":",
00150         #           " (", ($this_node->left_node_id?$this_node->left_node->root->node_id:"...."),
00151         #           " - ",  ($this_node->right_node_id?$this_node->right_node->root->node_id:"...."),")\n";
00152         #     }
00153         
00154 
00155 #   }
00156 #   print "\n" if $self->debug;
00157     
00158 #   $start += $step;
00159 #   last if (defined($end) and $end == 0);
00160 #   last if ($start >= $end);
00161 #   $all_genomic_align_trees = fetch_trees($self, $method_link_species_set->dbID, $start, $step);
00162 #    }
00163 }
00164 
00165 ##########################################
00166 #
00167 # internal methods
00168 #
00169 ##########################################
00170 sub get_params {
00171     my $self         = shift;
00172     my $param_string = shift;
00173 
00174     return unless($param_string);
00175     
00176     my $params = eval($param_string);
00177     return unless($params);
00178 
00179     if (defined($params->{'program'})) {
00180     $self->program($params->{'program'}); 
00181     }
00182 
00183    
00184     #read from parameters in analysis_job table
00185     if (defined($params->{'root_id'})) {
00186     $self->root_id($params->{'root_id'});
00187     }
00188     if (defined($params->{'start'})) {
00189     $self->start($params->{'start'});
00190     }
00191     if (defined($params->{'end'})) {
00192     $self->end($params->{'end'});
00193     }
00194     if (defined($params->{'step'})) {
00195     $self->step($params->{'step'});
00196     }
00197     if (defined($params->{'method_link_species_set_id'})) {
00198     $self->mlss_id($params->{'method_link_species_set_id'});
00199     }
00200     if (defined($params->{'mlss_id'})) {
00201     $self->mlss_id($params->{'mlss_id'});
00202     }
00203     if (defined($params->{'alignment_type'})) {
00204     $self->alignment_type($params->{'alignment_type'});
00205     }
00206     if (defined($params->{'set_of_species'})) {
00207     $self->set_of_species($params->{'set_of_species'});
00208     }
00209 }
00210 #read start from analysis_job table
00211 sub root_id {
00212     my $self = shift;
00213     $self->{'_root_id'} = shift if(@_);
00214     return $self->{'_root_id'};
00215 }
00216 
00217 #read start from analysis_job table
00218 sub start {
00219     my $self = shift;
00220     $self->{'_start'} = shift if(@_);
00221     return $self->{'_start'};
00222 }
00223 
00224 #read end from analysis_job table
00225 sub end {
00226     my $self = shift;
00227     $self->{'_end'} = shift if(@_);
00228     return $self->{'_end'};
00229 }
00230 
00231 #read step from analysis_job table
00232 sub step {
00233     my $self = shift;
00234     $self->{'_step'} = shift if(@_);
00235     return $self->{'_step'};
00236 }
00237 
00238 #read mlss_id from analysis_job table
00239 sub mlss_id {
00240     my $self = shift;
00241     $self->{'_mlss_id'} = shift if(@_);
00242     return $self->{'_mlss_id'};
00243 }
00244 
00245 #read alignment_type from analysis_job table
00246 sub alignment_type {
00247     my $self = shift;
00248     $self->{'_alignment_type'} = shift if(@_);
00249     return $self->{'_alignment_type'};
00250 }
00251 
00252 #read mlss_id from analysis_job table
00253 sub set_of_species {
00254     my $self = shift;
00255     $self->{'_set_of_species'} = shift if(@_);
00256     return $self->{'_set_of_species'};
00257 }
00258 
00259 sub set_neighbour_nodes_for_internal_node {
00260   my ($this_node) = @_;
00261 
00262   my ($left_node_id, $right_node_id);
00263   foreach my $this_child (@{$this_node->children}) {
00264     my $left_node = $this_child->left_node;
00265     my $right_node = $this_child->right_node;
00266 
00267     if ($left_node and $left_node->parent) {
00268       if (!defined($left_node_id)) {
00269         $left_node_id = $left_node->parent->node_id;
00270       } elsif ($left_node_id != $left_node->parent->node_id) {
00271         $left_node_id = 0;
00272       }
00273     } else {
00274       $left_node_id = 0;
00275     }
00276     if ($right_node and $right_node->parent) {
00277       if (!defined($right_node_id)) {
00278         $right_node_id = $right_node->parent->node_id;
00279       } elsif ($right_node_id != $right_node->parent->node_id) {
00280         $right_node_id = 0;
00281       }
00282     } else {
00283       $right_node_id = 0;
00284     }
00285     $left_node->release_tree if (defined $left_node);
00286     $right_node->release_tree if (defined $right_node);
00287   }
00288   $this_node->left_node_id($left_node_id) if ($left_node_id);
00289   $this_node->right_node_id($right_node_id) if ($right_node_id);
00290 
00291   return $this_node;
00292 }
00293 
00294 sub fetch_trees {
00295   my ($self, $mlss_id, $start, $step) = @_;
00296 
00297   my $sql = "select node_id from genomic_align_tree where node_id between ${mlss_id}0000000000 and ${mlss_id}9999999999 and parent_id = 0 order by node_id limit $start, $step";
00298 
00299   my $trees = [];
00300   my $sth = $self->{'comparaDBA'}->dbc->prepare($sql);
00301   $sth->execute();
00302   while (my $row = $sth->fetchrow_arrayref()) {
00303     my $root_id = $row->[0];
00304     my $this_tree = $self->{'comparaDBA'}->get_GenomicAlignTreeAdaptor->fetch_node_by_node_id($root_id);
00305     push(@$trees, $this_tree);
00306   }
00307   $sth->finish;
00308 
00309   return $trees;
00310 }