Archive Ensembl HomeArchive Ensembl Home
StableIdHistoryTree.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree
00024 
00025 =head1 SYNOPSIS
00026 
00027   my $reg = "Bio::EnsEMBL::Registry";
00028   my $archiveStableIdAdaptor =
00029     $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' );
00030 
00031   my $stable_id = 'ENSG00000068990';
00032   my $history =
00033     $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01');
00034 
00035   print "Unique stable IDs in this tree:\n";
00036   print join( ", ", @{ $history->get_unique_stable_ids } ), "\n";
00037 
00038   print "\nReleases in this tree:\n";
00039   print join( ", ", @{ $history->get_release_display_names } ), "\n";
00040 
00041   print "\nCoordinates of nodes in the tree:\n\n";
00042   foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) {
00043     print "  Stable ID: " . $a->stable_id . "." . $a->version . "\n";
00044     print "  Release: "
00045       . $a->release . " ("
00046       . $a->assembly . ", "
00047       . $a->db_name . ")\n";
00048     print "  coords: "
00049       . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } )
00050       . "\n\n";
00051   }
00052 
00053 =head1 DESCRIPTION
00054 
00055 This object represents a stable ID history tree graph.
00056 
00057 The graph is implemented as a collection of nodes (ArchiveStableId
00058 objects) and links (StableIdEvent objects) which have positions
00059 on an (x,y) grid. The x axis is used for releases, the y axis for
00060 stable_ids. The idea is to create a plot similar to this (the numbers
00061 shown on the nodes are the stable ID versions):
00062 
00063   ENSG001   1-------------- 2--
00064                                 \
00065   ENSG003                         1-----1
00066                                 /
00067   ENSG002   1-------2----------
00068 
00069            38      39      40    41    42
00070 
00071 The grid coordinates of the ArchiveStableId objects in this example
00072 would be (note that coordinates are zero-based):
00073 
00074   ENSG001.1               (0, 0)
00075   ENSG001.2               (2, 0)
00076   ENSG003.1 (release 41)  (3, 1) 
00077   ENSG003.1 (release 42)  (4, 1) 
00078   ENSG002.1               (0, 2)
00079   ENSG002.2               (1, 2)
00080 
00081 The tree will only contain those nodes which had a change in the stable
00082 ID version. Therefore, in the above example, in release 39 ENSG001 was
00083 present and had version 1 (but will not be drawn there, to unclutter the
00084 output).
00085 
00086 The grid positions will be calculated by the API and will try to
00087 untangle the tree (i.e. try to avoid overlapping lines).
00088 
00089 =head1 METHODS
00090 
00091   new
00092   add_ArchiveStableIds
00093   add_ArchiveStableIds_for_events
00094   remove_ArchiveStableId
00095   flush_ArchiveStableIds
00096   add_StableIdEvents
00097   remove_StableIdEvent
00098   flush_StableIdEvents
00099   get_all_ArchiveStableIds
00100   get_all_StableIdEvents
00101   get_latest_StableIdEvent
00102   get_release_display_names
00103   get_release_db_names
00104   get_unique_stable_ids
00105   optimise_tree
00106   coords_by_ArchiveStableId
00107   calculate_coords
00108   consolidate_tree
00109   reset_tree
00110   current_dbname
00111   current_release
00112   current_assembly
00113   is_incomplete
00114 
00115 =head1 RELATED MODULES
00116 
00117   Bio::EnsEMBL::ArchiveStableId
00118   Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor
00119   Bio::EnsEMBL::StableIdEvent
00120 
00121 =cut
00122 
00123 package Bio::EnsEMBL::StableIdHistoryTree;
00124 
00125 use strict;
00126 use warnings;
00127 no warnings 'uninitialized';
00128 
00129 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00130 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00131 
00132 
00133 =head2 new
00134 
00135   Arg [CURRENT_DBNAME]   : (optional) name of current db
00136   Arg [CURRENT_RELEASE]  : (optional) current release number
00137   Arg [CURRENT_ASSEMBLY] : (optional) current assembly name
00138   Example     : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
00139   Description : object constructor
00140   Return type : Bio::EnsEMBL::StableIdHistoryTree
00141   Exceptions  : none
00142   Caller      : general
00143   Status      : At Risk
00144               : under development
00145 
00146 =cut
00147 
00148 sub new {
00149   my $caller = shift;
00150   my $class = ref($caller) || $caller;
00151   
00152   my $self = {};
00153   bless $self, $class;
00154 
00155   my ($current_dbname, $current_release, $current_assembly) =
00156     rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ );
00157 
00158   # initialise
00159   $self->{'current_dbname'} = $current_dbname;
00160   $self->{'current_release'} = $current_release;
00161   $self->{'current_assembly'} = $current_assembly;
00162   
00163   return $self;
00164 }
00165 
00166 
00167 =head2 add_ArchiveStableIds
00168 
00169   Arg[1..n]   : Bio::EnsEMBL::ArchiveStableId's @archive_ids
00170                 The ArchiveStableIds to add to the the history tree
00171   Example     : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id(
00172                   'ENSG00024808');
00173                 $history->add_ArchiveStableId($archive_id);
00174   Description : Adds ArchiveStableIds (nodes) to the history tree. No
00175                 calculation of grid coordinates is done at this point, you need
00176                 to initiate this manually with calculate_coords().
00177                 ArchiveStableIds are only added once for each release (to avoid
00178                 duplicates).
00179   Return type : none
00180   Exceptions  : thrown on invalid or missing argument
00181   Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
00182   Status      : At Risk
00183               : under development
00184 
00185 =cut
00186 
00187 sub add_ArchiveStableIds {
00188   my ($self, @archive_ids) = @_;
00189 
00190   throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.")
00191     unless (@archive_ids);
00192 
00193   foreach my $archive_id (@archive_ids) {
00194     throw("Bio::EnsEMBL::ArchiveStableId object expected.")
00195       unless (ref($archive_id) &&
00196               $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
00197 
00198     $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id;
00199   }
00200 }
00201 
00202 
00203 =head2 add_ArchiveStableIds_for_events 
00204 
00205   Example     : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
00206                 $history->add_StableIdEvents($event1, $event2);
00207                 $history->add_ArchiveStableIds_for_events;
00208   Description : Convenience method that adds all ArchiveStableIds for all
00209                 StableIdEvents attached to this object to the tree.
00210   Return type : none
00211   Exceptions  : none
00212   Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
00213   Status      : At Risk
00214               : under development
00215 
00216 =cut
00217 
00218 sub add_ArchiveStableIds_for_events {
00219   my $self = shift;
00220 
00221   foreach my $event (@{ $self->get_all_StableIdEvents }) {
00222     if ($event->old_ArchiveStableId) {
00223       $self->add_ArchiveStableIds($event->old_ArchiveStableId);
00224     }
00225     if ($event->new_ArchiveStableId) {
00226       $self->add_ArchiveStableIds($event->new_ArchiveStableId);
00227     }
00228   }
00229 }
00230 
00231 
00232 =head2 remove_ArchiveStableId
00233 
00234   Arg[1]      : Bio::EnsEMBL::ArchiveStableId $archive_id
00235                 the ArchiveStableId to remove from the tree
00236   Example     : $history->remove_ArchiveStableId($archive_id);
00237   Description : Removes an ArchiveStableId from the tree.
00238   Return type : none
00239   Exceptions  : thrown on missing or invalid argument
00240   Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
00241   Status      : At Risk
00242               : under development
00243 
00244 =cut
00245 
00246 sub remove_ArchiveStableId {
00247   my ($self, $archive_id) = @_;
00248     
00249   throw("Bio::EnsEMBL::ArchiveStableId object expected.")
00250     unless ($archive_id && ref($archive_id) &&
00251             $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
00252 
00253   my %nodes = %{ $self->{'nodes'} };
00254   delete $nodes{$self->_node_id($archive_id)};
00255   $self->{'nodes'} = \%nodes;
00256 }
00257 
00258 
00259 =head2 flush_ArchiveStableIds
00260 
00261   Example     : $history->flush_ArchiveStableIds;
00262   Description : Remove all ArchiveStableIds from the tree.
00263   Return type : none
00264   Exceptions  : none
00265   Caller      : general
00266   Status      : At Risk
00267               : under development
00268 
00269 =cut
00270 
00271 sub flush_ArchiveStableIds {
00272   my $self = shift;
00273   $self->{'nodes'} = undef;
00274 }
00275 
00276 
00277 #
00278 # generate a unique node identifier
00279 # 
00280 sub _node_id {
00281   my ($self, $archive_id) = @_;
00282   return $archive_id->stable_id . ':' . $archive_id->db_name;
00283 }
00284 
00285 
00286 =head2 add_StableIdEvents 
00287 
00288   Arg[1..n]   : Bio::EnsEMBL::StableIdEvent's @events
00289                 The StableIdEvents to add to the the history tree
00290   Example     : $history->add_StableIdEvents($event);
00291   Description : Adds StableIdEvents (links) to the history tree. Note that 
00292                 ArchiveStableIds attached to the StableIdEvent aren't added to
00293                 the tree automatically, you'll need to call
00294                 add_ArchiveStableIds_for_events later.
00295   Return type : none
00296   Exceptions  : thrown on invalid or missing argument
00297   Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
00298   Status      : At Risk
00299               : under development
00300 
00301 =cut
00302 
00303 sub add_StableIdEvents {
00304   my ($self, @events) = @_;
00305 
00306   throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.")
00307     unless (@events);
00308 
00309   foreach my $event (@events) {
00310     throw("Bio::EnsEMBL::StableIdEvent object expected.")
00311       unless ($event->isa('Bio::EnsEMBL::StableIdEvent'));
00312 
00313     $self->{'links'}->{$self->_link_id($event)} = $event;
00314   }
00315 }
00316 
00317 
00318 =head2 remove_StableIdEvent 
00319 
00320   Arg[1]      : Bio::EnsEMBL::StableIdEvent $event
00321                 the StableIdEvent to remove from the tree
00322   Example     : $history->remove_StableIdEvent($event);
00323   Description : Removes a StableIdEvent from the tree.
00324   Return type : none
00325   Exceptions  : thrown on missing or invalid arguments
00326   Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
00327   Status      : At Risk
00328               : under development
00329 
00330 =cut
00331 
00332 sub remove_StableIdEvent {
00333   my ($self, $event) = @_;
00334     
00335   throw("Bio::EnsEMBL::StableIdEvent object expected.") unless
00336     ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent'));
00337 
00338   my %links = %{ $self->{'links'} };
00339   delete $links{$self->_link_id($event)};
00340   $self->{'links'} = \%links;
00341 }
00342 
00343 
00344 =head2 flush_StableIdEvents 
00345 
00346   Example     : $history->flush_StableIdEvents; 
00347   Description : Removes all StableIdEvents from the tree.
00348   Return type : none
00349   Exceptions  : none
00350   Caller      : general
00351   Status      : At Risk
00352               : under development
00353 
00354 =cut
00355 
00356 sub flush_StableIdEvents {
00357   my $self = shift;
00358   $self->{'links'} = undef; 
00359 }
00360 
00361 
00362 #
00363 # generate a unique link identifier
00364 # 
00365 sub _link_id {
00366   my ($self, $event) = @_;
00367 
00368   my ($old_id, $old_db_name, $new_id, $new_db_name);
00369   if ($event->old_ArchiveStableId) {
00370     $old_id = $event->old_ArchiveStableId->stable_id;
00371     $old_db_name = $event->old_ArchiveStableId->db_name;
00372   }
00373   if ($event->new_ArchiveStableId) {
00374     $new_id = $event->new_ArchiveStableId->stable_id;
00375     $new_db_name = $event->new_ArchiveStableId->db_name;
00376   }
00377 
00378   return join(':', $old_id, $old_db_name, $new_id, $new_db_name);
00379 }
00380 
00381 
00382 =head2 get_all_ArchiveStableIds 
00383 
00384   Example     : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) {
00385                   print $arch_id->stable_id, '.', $arch_id->version, "\n";
00386                 }
00387   Description : Gets all ArchiveStableIds (nodes) in this tree.
00388   Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
00389   Exceptions  : none
00390   Caller      : general
00391   Status      : At Risk
00392               : under development
00393 
00394 =cut
00395 
00396 sub get_all_ArchiveStableIds {
00397   my $self = shift;
00398   return [ values %{ $self->{'nodes'} } ]; 
00399 }
00400 
00401 
00402 =head2 get_all_current_ArchiveStableIds 
00403 
00404   Example     : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) {
00405                   print $arch_id->stable_id, '.', $arch_id->version, "\n";
00406                 }
00407   Description : Convenience method to get all current ArchiveStableIds in this
00408                 tree.
00409                 
00410                 Note that no lazy loading of "current" status is done at that
00411                 stage; as long as you retrieve your StableIdHistoryTree object
00412                 from ArchiveStableIdAdaptor, you'll get the right answer. In
00413                 other use cases, if you want to make sure you really get all
00414                 current stable IDs, loop over the result of
00415                 get_all_ArchiveStableIds() and call
00416                 ArchiveStableId->current_version() on all of them.
00417   Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
00418   Exceptions  : none
00419   Caller      : general
00420   Status      : At Risk
00421               : under development
00422 
00423 =cut
00424 
00425 sub get_all_current_ArchiveStableIds {
00426   my $self = shift;
00427 
00428   my @current = ();
00429 
00430   foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) {
00431     push @current, $arch_id if ($arch_id->is_current);
00432   }
00433 
00434   return \@current;
00435 }
00436 
00437 
00438 =head2 get_all_StableIdEvents 
00439 
00440   Example     : foreach my $event (@{ $history->get_all_StableIdsEvents }) {
00441                   print "Old stable ID: ", 
00442                     ($event->get_attribute('old', 'stable_id') or 'none'), "\n";
00443                   print "New stable ID: ", 
00444                     ($event->get_attribute('new', 'stable_id') or 'none'), "\n";
00445                   print "Mapping score: ", $event->score, "\n";
00446                 }
00447   Description : Gets all StableIdsEvents (links) in this tree.
00448   Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects
00449   Exceptions  : none
00450   Caller      : general
00451   Status      : At Risk
00452               : under development
00453 
00454 =cut
00455 
00456 sub get_all_StableIdEvents {
00457   my $self = shift;
00458   return [ values %{ $self->{'links'} } ]; 
00459 }
00460 
00461 
00462 =head2 get_latest_StableIdEvent
00463 
00464   Arg[1]      : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get
00465                 the latest Event for
00466   Example     : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new(
00467                   -stable_id => 'ENSG00001'
00468                 );
00469                 my $event = $history->get_latest_Event($arch_id);
00470   Description : Returns the latest StableIdEvent found in the tree where a given
00471                 stable ID is the new stable ID. If more than one is found (e.g.
00472                 in a merge scenario in the latest mapping), preference is given
00473                 to self-events.
00474   Return type : Bio::EnsEMBL::StableIdEvent
00475   Exceptions  : thrown on missing or wrong argument
00476   Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general
00477   Status      : At Risk
00478               : under development
00479 
00480 =cut
00481 
00482 sub get_latest_StableIdEvent {
00483   my $self = shift;
00484   my $arch_id = shift;
00485   
00486   unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) {
00487     throw("Need a Bio::EnsEMBL::ArchiveStableId.");
00488   }
00489 
00490   my @all_events = @{ $self->get_all_StableIdEvents };
00491   my @self_events = ();
00492 
00493   while (my $event = shift(@all_events)) {
00494     if ($event->new_ArchiveStableId and
00495         $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) {
00496       push @self_events, $event;
00497     }
00498   }
00499 
00500   my @sorted = sort { $b->new_ArchiveStableId->release <=>
00501                       $a->new_ArchiveStableId->release } @self_events;
00502   
00503   # give priority to self events
00504   my $latest;
00505   while ($latest = shift @sorted) {
00506     last if (($latest->old_ArchiveStableId and
00507               $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id)
00508              or !$latest->old_ArchiveStableId);
00509   }
00510 
00511   return $latest;
00512 }
00513 
00514 
00515 =head2 get_release_display_names
00516 
00517   Example     : print "Unique release display_names in this tree:\n"
00518                 foreach my $name (@{ $history->get_release_display_names }) {
00519                   print "  $name\n";
00520                 }
00521   Description : Returns a chronologically sorted list of unique release
00522                 display_names in this tree.
00523 
00524                 This method can be used to determine the number of columns when
00525                 plotting the history tree.
00526   Return type : Arrayref of strings.
00527   Exceptions  : none
00528   Caller      : general
00529   Status      : At Risk
00530               : under development
00531 
00532 =cut
00533 
00534 sub get_release_display_names {
00535   my $self = shift;
00536   
00537   my @display_names = map { $_->[1] } @{ $self->_sort_releases };
00538 
00539   return \@display_names;
00540 }
00541 
00542 
00543 =head2 get_release_db_names
00544 
00545   Example     : print "Unique release db_names in this tree:\n"
00546                 foreach my $name (@{ $history->get_release_db_names }) {
00547                   print "  $name\n";
00548                 }
00549   Description : Returns a chronologically sorted list of unique release
00550                 db_names in this tree.
00551   Return type : Arrayref of strings.
00552   Exceptions  : none
00553   Caller      : general
00554   Status      : At Risk
00555               : under development
00556 
00557 =cut
00558 
00559 sub get_release_db_names {
00560   my $self = shift;
00561   
00562   my @db_names = map { $_->[0] } @{ $self->_sort_releases };
00563 
00564   return \@db_names;
00565 }
00566 
00567 
00568 #
00569 # Create a chronologically sorted list of releases.
00570 #
00571 # Return type : Arrayref of arrayrefs (db_name, release)
00572 #
00573 sub _sort_releases {
00574   my $self = shift;
00575 
00576   unless ($self->{'sorted_tree'}->{'releases'}) {
00577 
00578     my %unique = ();
00579 
00580     foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
00581       $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1;
00582     }
00583 
00584     # sort releases by release number, then db_name; this should get them into
00585     # chronological order
00586     my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] }
00587       map { [ split(/:/, $_) ] } keys(%unique);
00588 
00589     $self->{'sorted_tree'}->{'releases'} = \@releases;
00590   
00591   }
00592 
00593   return $self->{'sorted_tree'}->{'releases'};
00594 }
00595 
00596 
00597 =head2 get_unique_stable_ids 
00598 
00599   Example     : print "Unique stable IDs in this tree:\n"
00600                 foreach my $id (@{ $history->get_unique_stable_ids }) {
00601                   print "  $id\n";
00602                 }
00603   Description : Returns a list of unique stable IDs in this tree. Version is not
00604                 taken into account here. This method can be used to determine
00605                 the number of rows when plotting the history with each stable ID
00606                 occupying one line.
00607 
00608                 Sort algorithm will depend on what was chosen when the sorted
00609                 tree was generated. This ranges from a simple alphanumeric sort
00610                 to algorithms trying to untangle the history tree. If no
00611                 pre-sorted data is found, an alphanumerically sorted list will
00612                 be returned by default.
00613   Return type : Arrayref of strings.
00614   Exceptions  : none
00615   Caller      : general
00616   Status      : At Risk
00617               : under development
00618 
00619 =cut
00620 
00621 sub get_unique_stable_ids {
00622   my $self = shift;
00623   
00624   unless ($self->{'sorted_tree'}->{'stable_ids'}) {
00625     $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids;
00626   }
00627   
00628   return $self->{'sorted_tree'}->{'stable_ids'};
00629 }
00630 
00631 
00632 #
00633 # Returns a list of stable IDs in this history tree, sorted alphabetically.
00634 # This is the simplest sort function used and doesn't try to untangle the tree.
00635 #
00636 # Return type : Arrayref
00637 #
00638 sub _sort_stable_ids {
00639   my $self = shift;
00640   my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds };
00641   return [sort keys %unique];
00642 }
00643 
00644 
00645 =head2 optimise_tree
00646 
00647   Example     : $history->optimise_tree;
00648   Description : This method sorts the history tree so that the number of
00649                 overlapping branches is minimised (thus "untangling" the tree).
00650                 
00651                 It uses a clustering algorithm for this which iteratively moves
00652                 the nodes with the largest vertical distance next to each other
00653                 and looking for a mininum in total branch length. This might not
00654                 produce the overall optimum but usually converges on a local
00655                 optimum very quickly.
00656   Return type : none
00657   Exceptions  : none
00658   Caller      : calculate_coords
00659   Status      : At Risk
00660               : under development
00661 
00662 =cut
00663 
00664 sub optimise_tree {
00665   my $self = shift;
00666 
00667   # get all non-self events
00668   my @links;
00669   foreach my $event (@{ $self->get_all_StableIdEvents }) {
00670     next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId);
00671     my $old_id = $event->old_ArchiveStableId->stable_id;
00672     my $new_id = $event->new_ArchiveStableId->stable_id;
00673     push @links, [$old_id, $new_id] if ($old_id ne $new_id);
00674   }
00675 
00676   # get initial list of sorted unique stable IDs and put them into a position
00677   # lookup hash
00678   my $i = 0;
00679   my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids };
00680 
00681   my $opt_length;
00682   my $successive_fails = 0;
00683   my $k = 0;
00684   my %seen;
00685 
00686   # for debug purposes:
00687   # find the number of permutations for the given number of stable IDs
00688   my $fact = $self->_factorial(scalar(keys %pos));
00689 
00690   OPT:
00691   while ($successive_fails < 100) {
00692 
00693     # sort links by vertical distance
00694     #warn "sorting\n";
00695     $self->_sort_links(\@links, \%pos);
00696 
00697     # loop over sorted links
00698     SORTED:
00699     foreach my $link (@links) {
00700       
00701       #warn "  trying ".join('-', @$link)."\n";
00702 
00703       $k++;
00704       
00705       # remember last sort order
00706       my %last = %pos;
00707       
00708       #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
00709       #warn "    before $this_order\n";
00710 
00711       # try both to move bottom node next to top node's current position and
00712       # top node next to bottom node's position - one of the methods might give
00713       # you better results
00714       DIRECT:
00715       foreach my $direction (qw(up down)) {
00716 
00717         # move the nodes next to each other
00718         $self->_move_nodes($link, \%pos, $direction);
00719 
00720         # next if we've seen this sort order before
00721         my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
00722         #warn "    after ($direction) $new_order\n";
00723         if ($seen{$new_order}) {
00724           #warn "      seen\n";
00725           %pos = %last;
00726           next DIRECT;
00727         }
00728         $seen{$new_order} = 1;
00729 
00730         # calculate total link length for this sort order
00731         my $total_length = $self->_total_link_length(\@links, \%pos);
00732 
00733         if (!$opt_length or $total_length < $opt_length) {
00734           #warn "      better ($total_length/$opt_length)\n";
00735           $opt_length = $total_length;
00736           $successive_fails = 0;
00737           next OPT;
00738         } else {
00739           #warn "      worse ($total_length/$opt_length)\n";
00740           %pos = %last;
00741           $successive_fails++;
00742         }
00743       }
00744       
00745     }
00746 
00747     last OPT;
00748     
00749   }
00750 
00751   #warn "Needed $k tries (of $fact) to find optimal tree.\n";
00752 
00753   my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos;
00754   $self->{'sorted_tree'}->{'stable_ids'} = \@best;
00755 }
00756 
00757 
00758 #
00759 # find the number of permutations for a give array size.
00760 # used for debugging code (compare implemented algorithm to looping over all
00761 # possible permutations).
00762 #
00763 sub _factorial {
00764   my ($self, $n) = @_;
00765   my $s = 1;
00766   $s *= $n-- while $n > 0;
00767   return $s;
00768 }
00769 
00770 
00771 #
00772 # sort links by vertical distance
00773 #
00774 sub _sort_links {
00775   my ($self, $links, $pos) = @_;
00776 
00777   my @lookup;
00778 
00779   foreach my $link (@$links) {
00780     my $dist = $pos->{$link->[0]} - $pos->{$link->[1]};
00781     $dist = -$dist if ($dist < 0);
00782     push @lookup, [$dist, $link];
00783     #warn " $dist ".join(' ', @$link)."\n";
00784   }
00785 
00786   @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup;
00787 }
00788 
00789 
00790 #
00791 # make two nodes adjacent by moving the second node next to the first node
00792 # all other node coordinates are adjusted accordingly
00793 #
00794 sub _move_nodes {
00795   my ($self, $link, $pos, $direction) = @_;
00796 
00797   my $first_pos = $pos->{$link->[0]};
00798   my $second_pos = $pos->{$link->[1]};
00799 
00800   # swap positions if necessary
00801   if ($first_pos > $second_pos) {
00802     my $tmp = $second_pos;
00803     $second_pos = $first_pos;
00804     $first_pos = $tmp;
00805   }
00806   #warn "      $first_pos:$second_pos\n";
00807 
00808   foreach my $p (keys %$pos) {
00809     
00810     my $val = $pos->{$p};
00811     
00812     #warn "      $p $val\n";
00813     if ($direction eq 'up') {
00814       if ($val > $first_pos and $val < $second_pos) {
00815         $val++;
00816       } elsif ($val == $second_pos) {
00817         $val = $first_pos + 1;
00818       }
00819     } else {
00820       if ($val > $first_pos and $val < $second_pos) {
00821         $val--;
00822       } elsif ($val == $first_pos) {
00823         $val = $second_pos - 1;
00824       }
00825     }
00826     
00827     #warn "      $p $val\n";
00828     $pos->{$p} = $val;
00829     #warn "\n";
00830   }
00831 }
00832 
00833 
00834 #
00835 # calculate the total link (vertical distance) length based on this sort order
00836 #
00837 sub _total_link_length {
00838   my ($self, $links, $pos) = @_;
00839 
00840   my $total_length;
00841 
00842   foreach my $link (@$links) {
00843     my $length = $pos->{$link->[0]} - $pos->{$link->[1]};
00844     $length = -$length if ($length < 0);
00845     $total_length += $length;
00846   }
00847 
00848   return $total_length;
00849 }
00850 
00851 
00852 =head2 coords_by_ArchiveStableId 
00853 
00854   Arg[1]      : Bio::EnsEMBL::ArchiveStableId $archive_id
00855                 The ArchiveStableId to get tree grid coordinates for
00856   Example     : my ($x, $y) =
00857                   @{ $history->coords_by_ArchiveStableId($archive_id) };
00858                 print $archive_id->stable_id, " coords: $x, $y\n";
00859   Description : Returns the coordinates of an ArchiveStableId in the history
00860                 tree grid. If the ArchiveStableId isn't found in this tree, an
00861                 empty list is returned.
00862                 
00863                 Coordinates are zero-based (i.e. the top leftmost element in
00864                 the grid has coordinates [0, 0], not [1, 1]). This is to
00865                 facilitate using them to create a matrix as a two-dimensional
00866                 array of arrays.
00867   Return type : Arrayref (x coordinate, y coordinate)
00868   Exceptions  : thrown on wrong argument type
00869   Caller      : general
00870   Status      : At Risk
00871               : under development
00872 
00873 =cut
00874 
00875 sub coords_by_ArchiveStableId {
00876   my ($self, $archive_id) = @_;
00877 
00878   throw("Bio::EnsEMBL::ArchiveStableId object expected.")
00879     unless ($archive_id and ref($archive_id) and
00880       $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
00881   
00882   return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)}
00883     || [];
00884 }
00885 
00886 
00887 =head2 calculate_coords
00888 
00889   Example     : $history->calculate_coords;
00890   Description : Pre-calculates the grid coordinates of all nodes in the tree.
00891   Return type : none
00892   Exceptions  : none
00893   Caller      : ArchiveStableIdAdaptor::fetch_history_by_stable_id
00894   Status      : At Risk
00895               : under development
00896 
00897 =cut
00898 
00899 sub calculate_coords {
00900   my $self = shift;
00901 
00902   # reset any previous tree cordinate calculations
00903   $self->reset_tree;
00904 
00905   # the "master" information for the sorted tree is stored as the sorted lists
00906   # of releases (x) and stable IDs (y). Sort them now.
00907   my $db_names = $self->get_release_db_names;
00908 
00909   # untangle tree by sorting stable IDs appropriately
00910   $self->optimise_tree;
00911   my $stable_ids = $self->get_unique_stable_ids;
00912   
00913   # for performance reasons, additionally store coordinates in a lookup hash
00914   foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
00915   
00916     # coordinates are positions in the sorted lists
00917     my $x = $self->_index_of($archive_id->db_name, $db_names);
00918     my $y = $self->_index_of($archive_id->stable_id, $stable_ids);
00919   
00920     $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} =
00921       [ $x, $y ];
00922   }
00923 }
00924 
00925 #
00926 # Description : Returns the index of an element in an array
00927 # Example     : my @array = (a, b, c);
00928 #               my $i = _index_of('b', \@array); # will return 1
00929 # Return type : Int (or undef if element is not found in array)
00930 #
00931 sub _index_of {
00932   my ($self, $element, $arrayref) = @_;
00933 
00934   throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY');
00935 
00936   my @array = @$arrayref;
00937 
00938   while (my $e = pop(@array)) {
00939     return scalar(@array) if ($e eq $element);
00940   }
00941 
00942   return undef;
00943 }
00944 
00945 
00946 =head2 consolidate_tree
00947 
00948   Example     : $history->consolidate_tree;
00949   Description : Consolidate the history tree. This means removing nodes where
00950                 there wasn't a change and bridging gaps in the history. The end
00951                 result will be a sparse tree which only contains the necessary
00952                 information.
00953   Return type : none
00954   Exceptions  : none
00955   Caller      : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id
00956   Status      : At Risk
00957               : under development
00958 
00959 =cut
00960 
00961 sub consolidate_tree {
00962   my $self = shift;
00963 
00964   #
00965   # get all self-events and creations/deletions and sort them (by stable ID and
00966   # chronologically)
00967   #
00968   my @event_lookup;
00969   
00970   foreach my $event (@{ $self->get_all_StableIdEvents }) {
00971 
00972     my $old_id = $event->old_ArchiveStableId;
00973     my $new_id = $event->new_ArchiveStableId;
00974 
00975     if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) {
00976       if ($old_id) {
00977         push @event_lookup, [$old_id->stable_id, $old_id->release, 
00978           $old_id->db_name, $event];
00979       } else {
00980         push @event_lookup, [$new_id->stable_id, $new_id->release - 1,
00981           $new_id->db_name, $event];
00982       }
00983     }
00984   }
00985 
00986   my @self_events = map { $_->[3] }
00987     sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] }
00988       @event_lookup;
00989 
00990   #
00991   # consolidate tree
00992   #
00993   my $last = shift(@self_events);
00994 
00995   while (my $event = shift(@self_events)) {
00996 
00997     my $lo = $last->old_ArchiveStableId;
00998     my $ln = $last->new_ArchiveStableId;
00999     my $eo = $event->old_ArchiveStableId;
01000     my $en = $event->new_ArchiveStableId;
01001 
01002     if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id
01003         and $lo->version eq $eo->version) {
01004 
01005       # this removes redundant nodes and connects the remaining nodes:
01006       #
01007       # o--o--o  ->  o-----o
01008       # 1  1  1      1     1
01009 
01010       #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n";
01011 
01012       $self->remove_StableIdEvent($last);
01013       $self->remove_StableIdEvent($event);
01014 
01015       $event->old_ArchiveStableId($lo);
01016 
01017       $self->add_StableIdEvents($event);
01018 
01019     } elsif ($ln and $eo and $ln->db_name ne $eo->db_name
01020         and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) {
01021         
01022       # try to brigde gaps
01023 
01024       if ($en) {
01025         
01026         # o--o  o--o  ->  o--o-----o
01027         # 1  2  2  2      1  2     2
01028         #
01029         #    o  o--o  ->  o-----o
01030         #    1  1  1      1     1
01031         
01032         #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n";
01033 
01034         $self->remove_StableIdEvent($event);
01035         $event->old_ArchiveStableId($ln);
01036         $self->add_StableIdEvents($event);
01037 
01038       } elsif ($lo) {
01039         
01040         # there's a deletion event, deal with it differently
01041 
01042         if ($lo->version eq $ln->version) {
01043         
01044           # o--o  o  ->  o-----o
01045           # 1  1  1      1     1
01046           
01047           #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n";
01048 
01049           $self->remove_StableIdEvent($last);
01050           $last->new_ArchiveStableId($eo);
01051           $self->add_StableIdEvents($last);
01052 
01053         } else {
01054 
01055           # o--o  o  ->  o--o--o
01056           # 1  2  2      1  2  2
01057           
01058           #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n";
01059 
01060           $self->remove_StableIdEvent($event);
01061           $event->old_ArchiveStableId($ln);
01062           $event->new_ArchiveStableId($eo);
01063           $self->add_StableIdEvents($event);
01064 
01065         }
01066 
01067       } else {
01068 
01069         # creation followed by deletion in next mapping
01070         #
01071         # o  o  ->  o--o
01072         # 1  1      1  1
01073 
01074         #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n";
01075 
01076         $self->remove_StableIdEvent($last);
01077         $self->remove_StableIdEvent($event);
01078         $event->old_ArchiveStableId($ln);
01079         $event->new_ArchiveStableId($eo);
01080         $self->add_StableIdEvents($event);
01081 
01082       }
01083 
01084     } else {
01085       #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n";
01086     }
01087   
01088     $last = $event;
01089   }
01090   
01091   # now add ArchiveStableIds of the remaining events to the tree
01092   $self->add_ArchiveStableIds_for_events;
01093 }
01094 
01095 
01096 =head2 reset_tree
01097 
01098   Example     : $history->reset_tree;
01099   Description : Resets all pre-calculated tree grid data. Mostly used internally
01100                 by methods that modify the tree.
01101   Return type : none
01102   Exceptions  : none
01103   Caller      : internal
01104   Status      : At Risk
01105               : under development
01106 
01107 =cut
01108 
01109 sub reset_tree {
01110   my $self = shift;
01111   $self->{'sorted_tree'} = undef;
01112 }
01113 
01114 
01115 =head2 current_dbname
01116 
01117   Arg[1]      : (optional) String $dbname - the dbname to set
01118   Example     : my $dbname = $history->current_dbname;
01119   Description : Getter/setter for current dbname.
01120   Return type : String
01121   Exceptions  : none
01122   Caller      : general
01123   Status      : At Risk
01124               : under development
01125 
01126 =cut
01127 
01128 sub current_dbname {
01129   my $self = shift;
01130   $self->{'current_dbname'} = shift if (@_);
01131   return $self->{'current_dbname'};
01132 }
01133 
01134 
01135 =head2 current_release
01136 
01137   Arg[1]      : (optional) Int $release - the release to set
01138   Example     : my $release = $history->current_release;
01139   Description : Getter/setter for current release.
01140   Return type : Int
01141   Exceptions  : none
01142   Caller      : general
01143   Status      : At Risk
01144               : under development
01145 
01146 =cut
01147 
01148 sub current_release {
01149   my $self = shift;
01150   $self->{'current_release'} = shift if (@_);
01151   return $self->{'current_release'};
01152 }
01153 
01154 
01155 =head2 current_assembly
01156 
01157   Arg[1]      : (optional) String $assembly - the assembly to set
01158   Example     : my $assembly = $history->current_assembly;
01159   Description : Getter/setter for current assembly.
01160   Return type : String
01161   Exceptions  : none
01162   Caller      : general
01163   Status      : At Risk
01164               : under development
01165 
01166 =cut
01167 
01168 sub current_assembly {
01169   my $self = shift;
01170   $self->{'current_assembly'} = shift if (@_);
01171   return $self->{'current_assembly'};
01172 }
01173 
01174 
01175 =head2 is_incomplete
01176 
01177   Arg[1]      : (optional) Boolean $incomplete 
01178   Example     : if ($history->is_incomplete) {
01179                   print "Returned tree is incomplete due to too many mappings
01180                     in the database.\n";
01181                 }
01182   Description : Getter/setter for incomplete flag. This is used by
01183                 ArchiveStableIdAdaptor to indicate that it finished building
01184                 the tree prematurely due to too many mappins in the db and can
01185                 be used by applications to print warning messages.
01186   Return type : Boolean
01187   Exceptions  : none
01188   Caller      : general
01189   Status      : At Risk
01190               : under development
01191 
01192 =cut
01193 
01194 sub is_incomplete {
01195   my $self = shift;
01196   $self->{'incomplete'} = shift if (@_);
01197   return $self->{'incomplete'};
01198 }
01199 
01200 
01201 1;
01202