Archive Ensembl HomeArchive Ensembl Home
GenomicAlignTree.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009    http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::GenomicAlignTree
00022 
00023 =head1 SYNOPSIS
00024 
00025 =head1 DESCRIPTION
00026 
00027 Specific subclass of NestedSet to add functionality when the nodes of this tree
00028 are GenomicAlign objects and the tree is a representation of a Protein derived
00029 Phylogenetic tree
00030 
00031 =head1 APPENDIX
00032 
00033 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
00034 
00035 =cut
00036 
00037 
00038 package Bio::EnsEMBL::Compara::GenomicAlignTree;
00039 
00040 use strict;
00041 use Bio::EnsEMBL::Compara::BaseRelation;
00042 use Bio::EnsEMBL::Utils::Argument;
00043 use Bio::EnsEMBL::Utils::Exception;
00044 use Bio::SimpleAlign;
00045 use IO::File;
00046 
00047 use Bio::EnsEMBL::Compara::NestedSet;
00048 use Bio::EnsEMBL::Compara::BaseGenomicAlignSet;
00049 
00050 our @ISA = qw(Bio::EnsEMBL::Compara::NestedSet Bio::EnsEMBL::Compara::BaseGenomicAlignSet);
00051 
00052 
00053 =head2 left_node_id
00054 
00055   Arg [1]     : (optional) $left_node_id
00056   Example     : $object->left_node_id($left_node_id);
00057   Example     : $left_node_id = $object->left_node_id();
00058   Description : Getter/setter for the left_node_id attribute
00059   Returntype  : integer
00060   Exceptions  : none
00061   Caller      : general
00062   Status      : Stable
00063 
00064 =cut
00065 
00066 sub left_node_id {
00067   my $self = shift;
00068   if (@_) {
00069     $self->{_left_node_id} = shift;
00070   }
00071   return $self->{_left_node_id};
00072 }
00073 
00074 
00075 =head2 right_node_id
00076 
00077   Arg [1]     : (optional) $right_node_id
00078   Example     : $object->right_node_id($right_node_id);
00079   Example     : $right_node_id = $object->right_node_id();
00080   Description : Getter/setter for the right_node_id attribute
00081   Returntype  : integer
00082   Exceptions  : none
00083   Caller      : general
00084   Status      : Stable
00085 
00086 =cut
00087 
00088 sub right_node_id {
00089   my $self = shift;
00090   if (@_) {
00091     $self->{_right_node_id} = shift;
00092   }
00093   return $self->{_right_node_id};
00094 }
00095 
00096 
00097 =head2 left_node
00098 
00099   Arg [1]     : -none-
00100   Example     : $left_node = $object->left_node();
00101   Description : Get the left_node object from the database.
00102   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignTree object
00103   Exceptions  : none
00104   Caller      : general
00105   Status      : Stable
00106 
00107 =cut
00108 
00109 sub left_node {
00110   my $self = shift;
00111   if ($self->{_left_node_id} and $self->adaptor) {
00112     return $self->adaptor->fetch_node_by_node_id($self->{_left_node_id});
00113   }
00114   return undef;
00115 }
00116 
00117 
00118 =head2 right_node
00119 
00120   Arg [1]     : -none-
00121   Example     : $left_node = $object->right_node();
00122   Description : Get the right_node object from the database.
00123   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignTree object
00124   Exceptions  : none
00125   Caller      : general
00126   Status      : Stable
00127 
00128 =cut
00129 
00130 sub right_node {
00131   my $self = shift;
00132   if ($self->{_right_node_id} and $self->adaptor) {
00133     return $self->adaptor->fetch_node_by_node_id($self->{_right_node_id});
00134   }
00135   return undef;
00136 }
00137 
00138 =head2 ancestral_genomic_align_block_id
00139 
00140   Arg [1]     : (optional) $ancestral_genomic_align_block_id
00141   Example     : $object->ancestral_genomic_align_block_id($ancestral_genomic_align_block_id);
00142   Example     : $ancestral_genomic_align_block_id = $object->ancestral_genomic_align_block_id();
00143   Description : Getter/setter for the ancestral_genomic_align_block_id attribute
00144                 This attribute is intended for the root of the tree only!
00145   Returntype  : int
00146   Exceptions  : none
00147   Caller      : general
00148   Status      : Stable
00149 
00150 =cut
00151 
00152 sub ancestral_genomic_align_block_id {
00153   my $self = shift;
00154   if (@_) {
00155     $self->{_ancestral_genomic_align_block_id} = shift;
00156   }
00157   return $self->{_ancestral_genomic_align_block_id};
00158 }
00159 
00160 
00161 =head2 modern_genomic_align_block_id
00162 
00163   Arg [1]     : (optional) $modern_genomic_align_block_id
00164   Example     : $object->modern_genomic_align_block_id($modern_genomic_align_block_id);
00165   Example     : $modern_genomic_align_block_id = $object->modern_genomic_align_block_id();
00166   Description : Getter/setter for the modern_genomic_align_block_id attribute
00167   Returntype  : 
00168   Exceptions  : none
00169   Caller      : general
00170   Status      : Stable
00171 
00172 =cut
00173 
00174 sub modern_genomic_align_block_id {
00175   my $self = shift;
00176   if (@_) {
00177     $self->{_modern_genomic_align_block_id} = shift;
00178   }
00179   return $self->{_modern_genomic_align_block_id};
00180 }
00181 
00182 
00183 =head2 genomic_align_group
00184 
00185   Arg [1]     : (optional) $genomic_align_group
00186   Example     : $object->genomic_align_group($genomic_align_group);
00187   Example     : $genomic_align_group = $object->genomic_align_group();
00188   Description : Getter/setter for the genomic_align_group attribute
00189   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignGroup object
00190   Exceptions  : none
00191   Caller      : general
00192   Status      : Stable
00193 
00194 =cut
00195 
00196 sub genomic_align_group {
00197   my $self = shift;
00198   if (@_) {
00199     $self->{_genomic_align_group} = shift;
00200   }
00201   return $self->{_genomic_align_group};
00202 }
00203 
00204 
00205 =head2 get_all_genomic_aligns_for_node
00206 
00207   Arg [1]     : -none-
00208   Example     : $genomic_aligns = $object->get_all_genomic_aligns_for_node
00209   Description : Getter for all the GenomicAligns contained in the
00210                 genomic_align_group object on a node. This method is a short
00211                 cut for $object->genomic_align_group->get_all_GenomicAligns()
00212   Returntype  : listref of Bio::EnsEMBL::Compara::GenomicAlign objects
00213   Exceptions  : none
00214   Caller      : general
00215   Status      : Stable
00216 
00217 =cut
00218 
00219 sub get_all_genomic_aligns_for_node {
00220   my $self = shift(@_);
00221   return [] if (!$self->genomic_align_group);
00222   return $self->genomic_align_group->get_all_GenomicAligns;
00223 }
00224 
00225 =head2 genomic_align_array (DEPRECATED)
00226 
00227   Arg [1]     : -none-
00228   Example     : $genomic_aligns = $object->genomic_align_array
00229   Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED
00230   Returntype  : listref of Bio::EnsEMBL::Compara::GenomicAlign objects
00231   Exceptions  : none
00232   Caller      : general
00233   Status      : Stable
00234 
00235 =cut
00236 
00237 sub genomic_align_array {
00238     my $self = shift(@_);
00239 
00240     deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead");
00241     return($self->get_all_genomic_aligns_for_node);
00242 
00243 }
00244 
00245 =head2 get_all_GenomicAligns (DEPRECATED)
00246 
00247   Arg [1]     : -none-
00248   Example     : $genomic_aligns = $object->get_all_GenomicAligns
00249   Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED
00250   Returntype  : listref of Bio::EnsEMBL::Compara::GenomicAlign objects
00251   Exceptions  : none
00252   Caller      : general
00253   Status      : Stable
00254 
00255 =cut
00256 
00257 sub get_all_GenomicAligns {
00258     my $self = shift(@_);
00259 
00260     deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead");
00261     return($self->get_all_genomic_aligns_for_node);
00262 
00263 }
00264 
00265 =head2 reference_genomic_align
00266 
00267   Arg [1]     : (optional) $reference_genomic_align
00268   Example     : $object->reference_genomic_align($reference_genomic_align);
00269   Example     : $reference_genomic_align = $object->reference_genomic_align();
00270   Description : Getter/setter for the reference_genomic_align attribute
00271   Returntype  : Bio::EnsEMBL::Compara::GenomicAlign object
00272   Exceptions  : none
00273   Caller      : general
00274   Status      : Stable
00275 
00276 =cut
00277 
00278 sub reference_genomic_align {
00279   my $self = shift;
00280 
00281   if (@_) {
00282     $self->{reference_genomic_align} = shift;
00283   }
00284 
00285   return $self->{reference_genomic_align};
00286 }
00287 
00288 =head2 reference_genomic_align_id
00289 
00290   Arg [1]    : integer $reference_genomic_align_id
00291   Example    : $genomic_align_block->reference_genomic_align_id(4321);
00292   Description: get/set for attribute reference_genomic_align_id. A value of 0 will set the
00293                reference_genomic_align_id attribute to undef. When looking for genomic
00294                alignments in a given slice or dnafrag, the reference_genomic_align
00295                corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the
00296                starting slice or dnafrag. The reference_genomic_align_id is the dbID
00297                corresponding to the reference_genomic_align. All remaining
00298                Bio::EnsEMBL::Compara::GenomicAlign objects included in the
00299                Bio::EnsEMBL::Compara::GenomicAlignBlock are the
00300                non_reference_genomic_aligns.
00301                Synchronises reference_genomic_align and reference_genomic_align_id
00302                attributes.
00303   Returntype : integer
00304   Exceptions : throw if $reference_genomic_align_id id not a postive number
00305   Caller     : $genomic_align_block->reference_genomic_align_id(int)
00306   Status     : Stable
00307 
00308 =cut
00309 
00310 sub reference_genomic_align_id {
00311   my ($self, $reference_genomic_align_id) = @_;
00312  
00313   if (defined($reference_genomic_align_id)) {
00314     if ($reference_genomic_align_id !~ /^\d+$/) {
00315       throw "[$reference_genomic_align_id] should be a positive number.";
00316     }
00317     $self->{'reference_genomic_align_id'} = ($reference_genomic_align_id or undef);
00318 
00319     ## Synchronises reference_genomic_align and reference_genomic_align_id
00320     if (defined($self->{'reference_genomic_align'}) and
00321         defined($self->{'reference_genomic_align'}->dbID) and
00322         ($self->{'reference_genomic_align'}->dbID != ($self->{'reference_genomic_align_id'} or 0))) {
00323         $self->{'reference_genomic_align'} = undef; ## Attribute will be set on request
00324     }
00325 
00326   ## Try to get data from other sources...
00327   } elsif (!defined($self->{'reference_genomic_align_id'})) {
00328 
00329     ## ...from the reference_genomic_align attribute
00330     if (defined($self->{'reference_genomic_align'}) and
00331         defined($self->{'reference_genomic_align'}->dbID)) {
00332       $self->{'reference_genomic_align_id'} = $self->{'reference_genomic_align'}->dbID;
00333     }
00334   }
00335   
00336   return $self->{'reference_genomic_align_id'};
00337 }
00338 
00339 
00340 =head2 reference_genomic_align_node
00341 
00342   Arg [1]     : (optional) $reference_genomic_align_node
00343   Example     : $object->reference_genomic_align_node($reference_genomic_align_node);
00344   Example     : $reference_genomic_align_node = $object->reference_genomic_align_node();
00345   Description : Getter/setter for the reference_genomic_align_node attribute
00346   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignTree object
00347   Exceptions  : none
00348   Caller      : general
00349   Status      : Stable
00350 
00351 =cut
00352 
00353 sub reference_genomic_align_node {
00354   my $self = shift;
00355 
00356   if (@_) {
00357     $self->{reference_genomic_align_node} = shift;
00358   }
00359 
00360   return $self->{reference_genomic_align_node};
00361 }
00362 
00363 
00364 =head2 aligned_sequence
00365 
00366   Arg [1]     : -none-
00367   Example     : $aligned_sequence = $object->aligned_sequence();
00368   Description : Get the aligned sequence for this node. When the node
00369                 contains one single sequence, it returns its aligned sequence.
00370                 For composite segments, it returns the combined aligned seq.
00371   Returntype  : string
00372   Exceptions  : none
00373   Caller      : general
00374   Status      : Stable
00375 
00376 =cut
00377 
00378 sub aligned_sequence {
00379   my $self = shift;
00380   return $self->genomic_align_group->aligned_sequence(@_);
00381 }
00382 
00383 
00384 =head2 group_id
00385 
00386   Arg [1]    : integer $group_id
00387   Example    : my $group_id = $genomic_align_tree->group_id;
00388   Example    : $genomic_align_tree->group_id(1234);
00389   Description: get/set for attribute group_id of the underlying
00390                GenomicAlignBlock objects
00391   Returntype : integer
00392   Exceptions : A GenomicAlignTree is made of two GenomicAlignBlock
00393                object. The method fail when gettign the value if the
00394                two group_ids do not match
00395   Caller     : general
00396 
00397 =cut
00398 
00399 sub group_id {
00400     my ($self, $group_id) = @_;
00401     
00402     if (defined($group_id)) {
00403         $self->{'group_id'} = $group_id;
00404         # Set the group_id on the genomic_align_blocks...
00405         my %genomic_align_blocks;
00406         #foreach my $this_genomic_align_node (@{$self->get_all_sorted_genomic_align_nodes()}) {
00407         foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) {
00408         next if (!defined $this_genomic_align_node->genomic_align_group);
00409         foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) {
00410         my $this_genomic_align_block = $genomic_align->genomic_align_block;
00411         if ($this_genomic_align_block and !defined($genomic_align_blocks{$this_genomic_align_block})) {
00412             $this_genomic_align_block->group_id($group_id);
00413             $genomic_align_blocks{$this_genomic_align_block} = 1;
00414         }
00415         }
00416     }
00417     } elsif (!defined($self->{'group_id'}) and defined($self->{adaptor})) {
00418         # Try to get the ID from other sources...
00419         my %group_ids;
00420         my $genomic_align_block_adaptor = $self->adaptor->dba->get_GenomicAlignBlockAdaptor;
00421         foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) {
00422         next if (!defined $this_genomic_align_node->genomic_align_group);
00423         foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) {
00424         my $this_genomic_align_block_id = $genomic_align->genomic_align_block_id;
00425         my $this_genomic_align_block = $genomic_align_block_adaptor->fetch_by_dbID($this_genomic_align_block_id);
00426         if ($this_genomic_align_block->group_id) {
00427             $group_ids{$this_genomic_align_block->group_id} = 1;
00428         } else {
00429             $group_ids{"undef"} = 1;
00430         }
00431         }
00432         }
00433         if (keys %group_ids == 1) {
00434         if (!defined($group_ids{"undef"})) {
00435         $self->{'group_id'} = (keys %group_ids)[0];
00436         }
00437         } else {
00438         warning("Different group_ids found for this GenomicAlignTree\n");
00439         }
00440     }
00441     return $self->{'group_id'};
00442 }
00443 
00444 
00445 =head2 name
00446 
00447   Arg [1]     : (optional) string $name
00448   Example     : $object->name($name);
00449   Example     : $name = $object->name();
00450   Description : Getter/setter for the name attribute.
00451   Returntype  : string
00452   Exceptions  : none
00453   Caller      : general
00454   Status      : At risk
00455 
00456 =cut
00457 
00458 sub name {
00459   my $self = shift;
00460 
00461   if (@_) {
00462     $self->{_name} = shift;
00463   } elsif (!$self->{_name}) {
00464     my $genomic_align_group = $self->genomic_align_group;
00465     if (defined($self->SUPER::name()) and $self->SUPER::name() ne "") {
00466       ## Uses the name defined before blessing this object as a
00467       ## Bio::EnsEMBL::Compara::GenomicAlignTree in the Ortheus pipeline
00468       $self->{_name} = $self->SUPER::name();
00469     } elsif ($self->is_leaf) {
00470         my $gdb_name;
00471       if($genomic_align_group->genome_db->name =~ /(.)[^ ]+_(.{3})/) {
00472         $gdb_name = "${1}${2}";
00473       }
00474       else {
00475         $gdb_name = $genomic_align_group->genome_db->name();
00476         $gdb_name =~ tr/_//;
00477       }
00478       $gdb_name = ucfirst($gdb_name);
00479       $self->{_name} = $gdb_name.'_'.$genomic_align_group->dnafrag->name."_".
00480           $genomic_align_group->dnafrag_start."_".$genomic_align_group->dnafrag_end."[".
00481           (($genomic_align_group->dnafrag_strand eq "-1")?"-":"+")."]";
00482     } else {
00483       $self->{_name} = join("-", map {
00484         my $name = $_->genomic_align_group->genome_db->name;
00485         if($name =~ /(.)[^ ]+_(.{3})/) {
00486             $name = "$1$2";
00487         }
00488         else {
00489             $name =~ tr/_//;    
00490         } 
00491     $name = ucfirst($name);
00492         $name;
00493       } @{$self->get_all_leaves})."[".scalar(@{$self->get_all_leaves})."]";
00494     }
00495   }
00496 
00497   return $self->{_name};
00498 }
00499 
00500 
00501 =head2 get_all_sorted_genomic_align_nodes
00502 
00503   Arg [1]     : (optional) Bio::EnsEMBL::Compara::GenomicAlignTree $reference_genomic_align_node
00504   Example     : $object->get_all_sorted_genomic_align_nodes($ref_genomic_align_node);
00505   Example     : $nodes = $object->get_all_sorted_genomic_align_nodes();
00506   Description : If ref_genomic_align_node is set, sorts the tree based on the
00507                 reference_genomic_align_node
00508                 If ref_genomic_align_node is not set, sorts the tree based on
00509                 the species name
00510   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignTree
00511   Exceptions  : none
00512   Caller      : general
00513   Status      : At risk
00514 
00515 =cut
00516 
00517 sub get_all_sorted_genomic_align_nodes {
00518   my ($self, $reference_genomic_align_node) = @_;
00519   my $sorted_genomic_align_nodes = [];
00520 
00521   if (!$reference_genomic_align_node and $self->reference_genomic_align_node) {
00522     $reference_genomic_align_node = $self->reference_genomic_align_node;
00523   }
00524 
00525   my $children = $self->children;
00526   if (@$children >= 1) {
00527     $children = [sort _sort_children @$children];
00528     push(@$sorted_genomic_align_nodes, @{$children->[0]->get_all_sorted_genomic_align_nodes(
00529             $reference_genomic_align_node)});
00530     push(@$sorted_genomic_align_nodes, $self);
00531     for (my $i = 1; $i < @$children; $i++) {
00532       push(@$sorted_genomic_align_nodes, @{$children->[$i]->get_all_sorted_genomic_align_nodes(
00533               $reference_genomic_align_node)});
00534     }
00535   } elsif (@$children == 0) {
00536     push(@$sorted_genomic_align_nodes, $self);
00537   }
00538 
00539   return $sorted_genomic_align_nodes;
00540 }
00541 
00542 
00543 =head2 restrict_between_alignment_positions
00544 
00545   Arg[1]     : [optional] int $start, refers to the start of the alignment
00546   Arg[2]     : [optional] int $end, refers to the start of the alignment
00547   Arg[3]     : [optional] boolean $skip_empty_GenomicAligns
00548   Example    : none
00549   Description: restrict this GenomicAlignBlock. It returns a new object unless no
00550                restriction is needed. In that case, it returns the original unchanged
00551                object.
00552                This method uses coordinates relative to the alignment itself.
00553                For instance if you have an alignment like:
00554                             1    1    2    2    3
00555                    1   5    0    5    0    5    0
00556                    AAC--CTTGTGGTA-CTACTT-----ACTTT
00557                    AACCCCTT-TGGTATCTACTTACCTAACTTT
00558                and you restrict it between 5 and 25, you will get back a
00559                object containing the following alignment:
00560                             1    1
00561                    1   5    0    5
00562                    CTTGTGGTA-CTACTT----
00563                    CTT-TGGTATCTACTTACCT
00564 
00565                See restrict_between_reference_positions() elsewhere in this document
00566                for an alternative method using absolute genomic coordinates.
00567 
00568                NB: This method works only for GenomicAlignBlock which have been
00569                fetched from the DB as it is adjusting the dnafrag coordinates
00570                and the cigar_line only and not the actual sequences stored in the
00571                object if any. If you want to restrict an object with no coordinates
00572                a simple substr() will do!
00573 
00574   Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object
00575   Exceptions : none
00576   Caller     : general
00577   Status     : At risk
00578 
00579 =cut
00580 
00581 sub restrict_between_alignment_positions {
00582   my ($self, $start, $end, $skip_empty_GenomicAligns, $reference_genomic_align) = @_;
00583   my $genomic_align_tree;
00584   $genomic_align_tree = $self->copy();
00585   $genomic_align_tree->adaptor($self->adaptor);
00586   foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) {
00587     $this_node->adaptor($self->adaptor);
00588   }
00589 
00590   foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) {
00591     my $genomic_align_group = $this_node->genomic_align_group;
00592     next if (!$genomic_align_group);
00593     my $new_genomic_aligns = [];
00594     my $length = $this_node->length;
00595     foreach my $this_genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) {
00596       my $restricted_genomic_align = $this_genomic_align->restrict($start, $end, $length);
00597 
00598       if ($genomic_align_tree->reference_genomic_align eq $this_genomic_align) {
00599         ## Update the reference_genomic_align
00600         $genomic_align_tree->reference_genomic_align($restricted_genomic_align);
00601         $genomic_align_tree->reference_genomic_align_node($this_node);
00602       }
00603       if (!$skip_empty_GenomicAligns or
00604           $restricted_genomic_align->dnafrag_start <= $restricted_genomic_align->dnafrag_end
00605           ) {
00606         ## Always skip composite segments outside of the range of restriction
00607         ## The cigar_line will contain only X's
00608         next if ($restricted_genomic_align->cigar_line =~ /^\d*X$/);
00609         $restricted_genomic_align->genomic_align_block($genomic_align_tree);
00610         push(@$new_genomic_aligns, $restricted_genomic_align);
00611       }
00612     }
00613     if (@$new_genomic_aligns) {
00614       $genomic_align_group->{genomic_align_array} = undef;
00615       foreach my $this_genomic_align (@$new_genomic_aligns) {
00616         $genomic_align_group->add_GenomicAlign($this_genomic_align);
00617       }
00618     } else {
00619       $this_node->disavow_parent();
00620       my $reference_genomic_align = $genomic_align_tree->reference_genomic_align;
00621       if ($reference_genomic_align) {
00622       my $reference_genomic_align_node = $genomic_align_tree->reference_genomic_align_node;
00623       $genomic_align_tree = $genomic_align_tree->minimize_tree();
00624       ## Make sure links are not broken after tree minimization
00625       $genomic_align_tree->reference_genomic_align($reference_genomic_align);
00626       $genomic_align_tree->reference_genomic_align->genomic_align_block($genomic_align_tree);
00627       $genomic_align_tree->reference_genomic_align_node($reference_genomic_align_node);
00628       }
00629     }
00630   }
00631 
00632   return $genomic_align_tree;
00633 }
00634 
00635 
00636 =head2 restrict_between_reference_positions
00637 
00638   Arg[1]     : [optional] int $start, refers to the reference_dnafrag
00639   Arg[2]     : [optional] int $end, refers to the reference_dnafrag
00640   Arg[3]     : [optional] Bio::EnsEMBL::Compara::GenomicAlign $reference_GenomicAlign
00641   Arg[4]     : [optional] boolean $skip_empty_GenomicAligns [ALWAYS FALSE]
00642   Example    : none
00643   Description: restrict this GenomicAlignBlock. It returns a new object unless no
00644                restriction is needed. In that case, it returns the original unchanged
00645                object
00646                It might be the case that the restricted region coincide with a gap
00647                in one or several GenomicAligns. By default these GenomicAligns are
00648                returned with a dnafrag_end equals to its dnafrag_start + 1. For instance,
00649                a GenomicAlign with dnafrag_start = 12345 and dnafrag_end = 12344
00650                correspond to a block which goes on this region from before 12345 to
00651                after 12344, ie just between 12344 and 12345. You can choose to remove
00652                these empty GenomicAligns by setting $skip_empty_GenomicAligns to any
00653                true value.
00654   Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object in scalar context. In
00655                list context, returns the previous object and the start and end
00656                positions of the restriction in alignment coordinates (from 1 to
00657                alignment_length)
00658   Exceptions : return undef if reference positions lie outside of the alignment
00659   Caller     : general
00660   Status     : At risk
00661 
00662 =cut
00663 
00664 =comment 
00665 
00666 sub restrict_between_reference_positions {
00667   my ($self, $start, $end, $reference_genomic_align, $skip_empty_GenomicAligns) = @_;
00668   my $genomic_align_tree;
00669 
00670   $reference_genomic_align ||= $self->reference_genomic_align;
00671   throw("A reference Bio::EnsEMBL::Compara::GenomicAlignTree must be given")
00672       if (!$reference_genomic_align);
00673 
00674    my @restricted_genomic_align_tree_params = $self->SUPER::restrict_between_reference_positions($start, $end, $reference_genomic_align, $skip_empty_GenomicAligns);
00675   my $restricted_genomic_align_tree = $restricted_genomic_align_tree_params[0];
00676 
00677   #return $self if (!$restricted_genomic_align_tree or $restricted_genomic_align_tree eq $self);
00678 
00679   return wantarray ? @restricted_genomic_align_tree_params : $restricted_genomic_align_tree;
00680 }
00681 
00682 =cut
00683 
00684 =head2 copy
00685 
00686   Arg         : none
00687   Example     : my $new_tree = $this_tree->copy()
00688   Description : Create a copy of this Bio::EnsEMBL::Compara::GenomicAlignTree
00689                 object
00690   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignTree
00691   Exceptions  : none
00692   Caller      : general
00693   Status      : At risk
00694 
00695 =cut
00696 
00697 sub copy {
00698   my $self = shift(@_);
00699   my $new_copy;
00700 
00701   $new_copy = $self->SUPER::copy();
00702 
00703   $new_copy->genomic_align_group($self->genomic_align_group->copy) if ($self->genomic_align_group);
00704   
00705   if ($self->reference_genomic_align_node) {
00706       my $ref_ga = $self->reference_genomic_align;
00707       #Need to find the reference_genomic_align in the genomic_align_group
00708       foreach my $leaf (@{$new_copy->get_all_leaves}) {
00709       foreach my $gag ($leaf->genomic_align_group) {
00710           foreach my $ga (@{$gag->genomic_align_array}) {
00711           if ($ref_ga->dnafrag_id == $ga->dnafrag_id &&
00712               $ref_ga->dnafrag_start == $ga->dnafrag_start &&
00713               $ref_ga->dnafrag_end == $ga->dnafrag_end &&
00714               $ref_ga->dnafrag_strand == $ga->dnafrag_strand) {
00715               $new_copy->reference_genomic_align($ga);
00716               last;
00717           }
00718           }
00719       }
00720       }
00721   }
00722   
00723 
00724   $new_copy->reference_genomic_align_node($self->reference_genomic_align_node->copy) if ($self->reference_genomic_align_node);
00725 
00726   #These are not deep copies
00727   #$new_copy->reference_genomic_align($self->reference_genomic_align) if ($self->reference_genomic_align);
00728   #$new_copy->reference_genomic_align_node($self->reference_genomic_align_node) if ($self->reference_genomic_align_node);
00729 
00730   #There are lots of bits missing from this copy
00731   #Still to add?
00732   #parent_link
00733   #obj_id_to_link
00734 
00735   $new_copy->{_original_strand} = $self->{_original_strand} if (defined $self->{_original_strand});
00736   $new_copy->{_parent_id} = $self->{_parent_id} if (defined $self->{_parent_id});
00737   $new_copy->{_root_id} = $self->{_root_id} if (defined $self->{_root_id});
00738   $new_copy->{_left_node_id} = $self->{_left_node_id} if (defined $self->{_left_node_id});
00739   $new_copy->{_right_node_id} = $self->{_right_node_id} if (defined $self->{_right_node_id});
00740   $new_copy->{_node_id} = $self->{_node_id} if (defined $self->{_node_id});
00741   $new_copy->{_reference_slice} = $self->{_reference_slice} if (defined $self->{_reference_slice});
00742   $new_copy->{_reference_slice_start} = $self->{_reference_slice_start} if (defined $self->{_reference_slice_start});
00743   $new_copy->{_reference_slice_end} = $self->{_reference_slice_end} if (defined $self->{_reference_slice_end});
00744   $new_copy->{_reference_slice_strand} = $self->{_reference_slice_strand} if (defined $self->{_reference_slice_strand});
00745 
00746   return $new_copy;
00747 }
00748 
00749 =head2 print
00750 
00751   Arg         : none
00752   Example     : print()
00753   Description : Print the fields in a Bio::EnsEMBL::Compara::GenomicAlignTree 
00754   Returntype  : none
00755   Exceptions  : none
00756   Caller      : general
00757   Status      : At risk
00758 
00759 =cut
00760 
00761 sub print {
00762   my $self = shift(@_);
00763   my $level = shift;
00764   my $reference_genomic_align = shift;
00765   if (!$level) {
00766     print STDERR $self->newick_format(), "\n";
00767     $reference_genomic_align = ($self->reference_genomic_align or "");
00768   }
00769   $level++;
00770   my $mark = "- ";
00771   if (grep {$_ eq $reference_genomic_align} @{$self->get_all_genomic_aligns_for_node}) {
00772     $mark = "* ";
00773   }
00774   print STDERR " " x $level, $mark,
00775       "[", $self->node_id, "/", ($self->get_original_strand?"+":"-"), "] ",
00776       $self->genomic_align_group->genome_db->name,":",
00777       $self->genomic_align_group->dnafrag->name,":",
00778       $self->genomic_align_group->dnafrag_start,":",
00779       $self->genomic_align_group->dnafrag_end,":",
00780       $self->genomic_align_group->dnafrag_strand,":",
00781       " (", ($self->left_node_id?$self->left_node->node_id."/".$self->left_node->root->node_id:"...."),
00782       " - ",  ($self->right_node_id?$self->right_node->node_id."/".$self->right_node->root->node_id:"...."),")\n";
00783   foreach my $this_genomic_align (@{$self->get_all_genomic_aligns_for_node}) {
00784     if ($this_genomic_align eq $reference_genomic_align) {
00785       print " " x 8, "* ", $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n";
00786     } else {
00787       print " " x 10, $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n";
00788     }
00789   }
00790   foreach my $node (sort _sort_children @{$self->children}) {
00791     $node->print($level, $reference_genomic_align);
00792   }
00793   $level--;
00794 }
00795 
00796 
00797 =head2 get_all_nodes_from_leaves_to_this
00798 
00799   Arg[1]      : Bio::EnsEMBL::Compara::GenomicAlignTree $all_nodes
00800   Example     : my $all_nodes = get_all_nodes_from_leaves_to_this()
00801   Description : 
00802   Returntype  : Bio::EnsEMBL::Compara::GenomicAlignTree object
00803   Exceptions  : none
00804   Caller      : general
00805   Status      : At risk
00806 
00807 =cut
00808 
00809 sub get_all_nodes_from_leaves_to_this {
00810   my $self = shift(@_);
00811   my $all_nodes = (shift or []);
00812   foreach my $node (sort _sort_children @{$self->children}) {
00813     $all_nodes = $node->get_all_nodes_from_leaves_to_this($all_nodes);
00814   }
00815   push(@$all_nodes, $self);
00816   return $all_nodes;
00817 }
00818 
00819 
00820 =head2 get_all_leaves
00821 
00822  Title   : get_all_leaves
00823  Usage   : my @leaves = @{$tree->get_all_leaves};
00824  Function: searching from the given starting node, searches and creates list
00825            of all leaves in this subtree and returns by reference.
00826            This method overwrites the parent method because it sorts
00827            the leaves according to their node_id. Here, we use this method
00828            to get all leaves in another sorting function. Not only it doesn't
00829            make much sense to sort something that will be sorted again, but
00830            it can also produce some Perl errors as sort methods uses $a and
00831            $b which are package global variables.
00832  Example :
00833  Returns : reference to list of NestedSet objects (all leaves)
00834  Args    : none
00835  Status  : At risk
00836 
00837 =cut
00838 
00839 sub get_all_leaves {
00840   my $self = shift;
00841 
00842   my $leaves = {};
00843   $self->_recursive_get_all_leaves($leaves);
00844   my @leaf_list = values(%{$leaves});
00845   return \@leaf_list;
00846 }
00847 
00848 
00849 =head2 _sort_children
00850 
00851   Arg         : none
00852   Example     : sort _sort_children @$children
00853   Description : sort function for sorting the nodes of a Bio::EnsEMBL::Compara::GenomicAlignTree object
00854   Returntype  : int (-1,0,1)
00855   Exceptions  : none
00856   Caller      : general
00857   Status      : At risk
00858 
00859 =cut
00860 
00861 sub _sort_children {
00862   my $reference_genomic_align_node;
00863 
00864   if (defined ($a->root) && defined($b->root) && $a->root eq $b->root and $a->root->reference_genomic_align_node) {
00865     $reference_genomic_align_node = $a->root->reference_genomic_align_node;
00866   }
00867 
00868   ## Reference GenomicAlign based sorting
00869   if ($reference_genomic_align_node) {
00870     if (grep {$_ eq $reference_genomic_align_node} @{$a->get_all_leaves}) {
00871       return -1;
00872     } elsif (grep {$_ eq $reference_genomic_align_node} @{$b->get_all_leaves}) {
00873       return 1;
00874     }
00875   }
00876 
00877   ## Species name based sorting
00878   my $species_a = $a->_name_for_sorting;
00879   my $species_b = $b->_name_for_sorting;
00880 
00881   return $species_a cmp $species_b;
00882 }
00883 
00884 =head2 _name_for_sorting
00885 
00886   Arg         : none
00887   Example     : my $species_a = $a->_name_for_sorting;
00888   Description : if the node is a leaf, create a name based on the species
00889                 name, dnafrag name, group_id and the start position. If the 
00890                 node is an internal node, create a name based on the species 
00891                 name, dnafrag name and the start position
00892   Returntype  : string 
00893   Exceptions  : none
00894   Caller      : _sort_children
00895   Status      : At risk
00896 
00897 =cut
00898 
00899 sub _name_for_sorting {
00900   my ($self) = @_;
00901   my $name;
00902 
00903   if ($self->is_leaf) {
00904     $name = sprintf("%s.%s.%s.%020d",
00905         $self->genomic_align_group->genome_db->name,
00906         $self->genomic_align_group->dnafrag->name,
00907         ($self->genomic_align_group->dbID or
00908           $self->genomic_align_group->{original_dbID} or 0),
00909         $self->genomic_align_group->dnafrag_start);
00910   } else {
00911     $name = join(" - ", sort map {sprintf("%s.%s.%020d",
00912         $_->genomic_align_group->genome_db->name,
00913         $_->genomic_align_group->dnafrag->name,
00914         $_->genomic_align_group->dnafrag_start)} @{$self->get_all_leaves});
00915   }
00916 
00917   return $name;
00918 }
00919 
00920 =head2 reverse_complement
00921 
00922   Args       : none
00923   Example    : none
00924   Description: reverse complement the tree,
00925                modifying dnafrag_strand and cigar_line of each GenomicAlign in consequence
00926   Returntype : none
00927   Exceptions : none
00928   Caller     : general
00929   Status     : At risk
00930 
00931 =cut
00932 
00933 sub reverse_complement {
00934     my ($self) = @_;
00935     
00936     if (defined($self->{_original_strand})) {
00937     $self->{_original_strand} = 1 - $self->{_original_strand};
00938     } else {
00939     $self->{_original_strand} = 0;
00940     }
00941 
00942     foreach my $this_node (@{$self->get_all_nodes}) {
00943     my $genomic_align_group = $this_node->genomic_align_group;
00944     next if (!$genomic_align_group);
00945     my $gas = $genomic_align_group->get_all_GenomicAligns;
00946     foreach my $ga (@{$gas}) {
00947         $ga->reverse_complement;
00948     }
00949     }
00950 }
00951 
00952 sub length {
00953   my ($self, $length) = @_;
00954  
00955   if (defined($length)) {
00956       $self->{'length'} = $length;
00957   } elsif (!defined($self->{'length'})) {
00958       # Try to get the ID from other sources...
00959       if (defined($self->{'adaptor'}) and defined($self->dbID)) {
00960       # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
00961       $self->adaptor->retrieve_all_direct_attributes($self);
00962       } elsif (@{$self->get_all_genomic_aligns_for_node} and $self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ")) {
00963       $self->{'length'} = CORE::length($self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ"));
00964       } else {
00965       foreach my $this_node (@{$self->get_all_nodes}) {
00966           my $genomic_align_group = $this_node->genomic_align_group;
00967           next if (!$genomic_align_group);
00968           $self->{'length'} = CORE::length($genomic_align_group->get_all_GenomicAligns->[0]->aligned_sequence("+FAKE_SEQ"));
00969           last;
00970       }
00971       }
00972   }
00973   return $self->{'length'};
00974 }
00975 
00976 =head2 alignment_strings
00977 
00978   Arg [1]    : none
00979   Example    : $genomic_align_tree->alignment_strings
00980   Description: Returns the alignment string of all the sequences in the
00981                alignment
00982   Returntype : array reference containing several strings
00983   Exceptions : none
00984   Caller     : general
00985   Status     : Stable
00986 
00987 =cut
00988 
00989 sub alignment_strings {
00990   my ($self) = @_;
00991   my $alignment_strings = [];
00992 
00993   foreach my $this_node (@{$self->get_all_nodes}) {
00994     my $genomic_align_group = $this_node->genomic_align_group;
00995     next if (!$genomic_align_group);
00996     foreach my $genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) {
00997     push(@$alignment_strings, $genomic_align->aligned_sequence);
00998     }
00999   }
01000 
01001   return $alignment_strings;
01002 }
01003 
01004 =head2 method_link_species_set
01005 
01006   Arg [1]    : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
01007   Example    : $method_link_species_set = $genomic_align_tree->method_link_species_set;
01008   Description: Getter for attribute method_link_species_set. Takes this from the first Bio::EnsEMBL::Compara::GenomicAlign
01009                object
01010   Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
01011   Exceptions : thrown if $method_link_species_set is not a
01012                Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
01013   Caller     : general
01014   Status     : Stable
01015 
01016 =cut
01017 
01018 sub method_link_species_set {
01019   my ($self) = @_;
01020 
01021   my $method_link_species_set = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set;
01022 
01023   throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
01024         unless ($method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
01025 
01026   return $method_link_species_set;
01027 }
01028 
01029 =head2 method_link_species_set_id
01030 
01031   Arg [1]    : integer $method_link_species_set_id
01032   Example    : $method_link_species_set_id = $genomic_align_tree->method_link_species_set_id;
01033   Description: Getter for the attribute method_link_species_set_id. Takes this from the first 
01034                Bio::EnsEMBL::Compara::GenomicAlign object
01035   Returntype : integer
01036   Caller     : object::methodname
01037   Status     : Stable
01038 
01039 =cut
01040 
01041 sub method_link_species_set_id {
01042   my ($self) = @_;
01043 
01044   my $method_link_species_set_id = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set->dbID;
01045 
01046   return $method_link_species_set_id;
01047 }
01048 
01049 #sub DESTROY {
01050 #    my ($self) = @_;
01051 #    $self->release_tree unless ($self->{_parent_link});
01052 #    }
01053 
01054 1;