Archive Ensembl HomeArchive Ensembl Home
NestedSetAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009    http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::DBSQL::NestedSetAdaptor
00022 
00023 =head1 DESCRIPTION
00024 
00025 Base adaptor for objects inheriting from NestedSet
00026 
00027 =head1 INHERITANCE TREE
00028 
00029 =head1 AUTHORSHIP
00030 
00031 Ensembl Team. Individual contributions can be found in the CVS log.
00032 
00033 =head1 MAINTAINER
00034 
00035 $Author: mm14 $
00036 
00037 =head VERSION
00038 
00039 $Revision: 1.49 $
00040 
00041 =head1 APPENDIX
00042 
00043 The rest of the documentation details each of the object methods.
00044 Internal methods are usually preceded with an underscore (_)
00045 
00046 =cut
00047 
00048 package Bio::EnsEMBL::Compara::DBSQL::NestedSetAdaptor;
00049 
00050 use strict;
00051 
00052 use Bio::EnsEMBL::DBSQL::DBConnection;
00053 use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate);
00054 use Bio::EnsEMBL::Utils::SqlHelper;
00055 
00056 use Bio::EnsEMBL::Compara::NestedSet;
00057 
00058 use base ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
00059 
00060 
00061 ###########################
00062 # FETCH methods
00063 ###########################
00064 
00065 =head2 fetch_all
00066 
00067   Arg[1]     : -none-
00068   Example    : $all_trees = $proteintree_adaptor->fetch_all();
00069 
00070   Description: Fetches from the database all the nested sets.
00071   Returntype : arrayref of Bio::EnsEMBL::Compara::NestedSet
00072   Exceptions :
00073   Caller     :
00074 
00075 =cut
00076 
00077 sub fetch_all {
00078   my ($self) = @_;
00079 
00080   my $table = $self->tables->[0]->[1];
00081   my $constraint = "WHERE $table.node_id = $table.root_id";
00082   my $nodes = $self->_generic_fetch($constraint);
00083 
00084   return $nodes;
00085 }
00086 
00087 sub fetch_node_by_node_id {
00088   my ($self, $node_id) = @_;
00089 
00090   if (! defined $node_id) {
00091     throw("node_id is undefined")
00092   }
00093 
00094   my $table= $self->tables->[0]->[1];
00095   my $constraint = "WHERE $table.node_id = $node_id";
00096   my ($node) = @{$self->_generic_fetch($constraint)};
00097   return $node;
00098 }
00099 
00100 
00101 sub fetch_parent_for_node {
00102   my ($self, $node) = @_;
00103 
00104   unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
00105     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00106   }
00107 
00108   my $table= $self->tables->[0]->[1];
00109   my $constraint = "WHERE $table.node_id = " . $node->_parent_id;
00110   my ($parent) = @{$self->_generic_fetch($constraint)};
00111   return $parent;
00112 }
00113 
00114 
00115 sub fetch_all_children_for_node {
00116   my ($self, $node) = @_;
00117 
00118   unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
00119     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00120   }
00121 
00122   my $constraint = "WHERE parent_id = " . $node->node_id;
00123   my $kids = $self->_generic_fetch($constraint);
00124   foreach my $child (@{$kids}) { $node->add_child($child); }
00125 
00126   return $node;
00127 }
00128 
00129 sub fetch_all_leaves_indexed {
00130   my ($self, $node) = @_;
00131 
00132   unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
00133     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00134   }
00135 
00136   my $table= $self->tables->[0]->[1];
00137   my $left_index = $node->left_index;
00138   my $right_index = $node->right_index;
00139   my $root_id = $node->_root_id;
00140   my $constraint = "WHERE ($table.root_id = $root_id) AND (($table.right_index - $table.left_index) = 1) AND ($table.left_index > $left_index) AND ($table.right_index < $right_index)";
00141   my @leaves = @{$self->_generic_fetch($constraint)};
00142 
00143   return \@leaves;
00144 }
00145 
00146 sub fetch_subtree_under_node {
00147   my $self = shift;
00148   my $node = shift;
00149 
00150   unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
00151     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00152   }
00153 
00154   unless ($node->left_index && $node->right_index) {
00155     warning("fetch_subtree_under_node subroutine assumes that left and right index has been built and store in the database.\n This does not seem to be the case for node_id=".$node->node_id.". Returning node.\n");
00156     return $node;
00157   }
00158 
00159   my $alias = $self->tables->[0]->[1];
00160 
00161   my $left_index = $node->left_index;
00162   my $right_index = $node->right_index;
00163   my $root_id = $node->_root_id;
00164   my $constraint = "WHERE ($alias.root_id = $root_id) AND ($alias.left_index >= $left_index) AND ($alias.right_index <= $right_index)";
00165   my $all_nodes = $self->_generic_fetch($constraint);
00166   push @{$all_nodes}, $node;
00167   $self->_build_tree_from_nodes($all_nodes);
00168   return $node;
00169 }
00170 
00171 
00172 sub fetch_tree_at_node_id {
00173   my $self = shift;
00174   my $node_id = shift;
00175 
00176   if (! defined $node_id) {
00177     throw ("node_id is undefined");
00178   }
00179 
00180   my $node = $self->fetch_node_by_node_id($node_id);
00181 
00182   return $self->fetch_subtree_under_node($node);
00183 }
00184 
00185 
00186 sub fetch_all_roots {
00187   my $self = shift;
00188 
00189   my $constraint = "WHERE t.root_id = 0";
00190   return $self->_generic_fetch($constraint);
00191 }
00192 
00193 # UNUSED
00194 sub fetch_subroot_by_left_right_index {
00195   my ($self,$node) = @_;
00196 
00197   unless ($node->left_index && $node->right_index) {
00198     warning("fetch_subroot_by_left_right_index subroutine assumes that left and right index has been built and store in the database.\n This does not seem to be the case.\n");
00199   }
00200   my $left_index = $node->left_index;
00201   my $right_index = $node->right_index;
00202   my $root_id = $node->_root_id;
00203 
00204   my $constraint = "WHERE parent_id = $root_id";
00205   $constraint .= " AND left_index<=$left_index";
00206   $constraint .= " AND right_index>=$right_index";
00207   return $self->_generic_fetch($constraint)->[0];
00208 }
00209 
00210 
00211 =head2 fetch_root_by_node
00212 
00213   Arg [1]    : Bio::EnsEMBL::Compara::NestedSet $node
00214   Example    : $root = $nested_set_adaptor->fetch_root_by_node($node);
00215   Description: Returns the root of the tree for this node
00216                with links to all the intermediate nodes. Sister nodes
00217                are not included in the result. Use fetch_node_by_node_id()
00218                method to get the whole tree (loaded on demand)
00219   Returntype : Bio::EnsEMBL::Compara::NestedSet
00220   Exceptions : thrown if $node is not defined
00221   Status     : At-risk
00222   Caller     : $nested_set->root
00223 
00224 =cut
00225 sub fetch_root_by_node {
00226   my ($self, $node) = @_;
00227 
00228   unless(UNIVERSAL::isa($node, 'Bio::EnsEMBL::Compara::NestedSet')) {
00229     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00230   }
00231 
00232   my $alias = $self->tables->[0]->[1];
00233 
00234   my $left_index = $node->left_index;
00235   my $right_index = $node->right_index;
00236   my $root_id = $node->_root_id;
00237 
00238   my $constraint = "WHERE ($alias.root_id = $root_id) AND ($alias.left_index <= $left_index) AND ($alias.right_index >= $right_index)";
00239   my $nodes = $self->_generic_fetch($constraint);
00240   my $root = $self->_build_tree_from_nodes($nodes);
00241 
00242   return $root;
00243 }
00244 
00245 
00246 =head2 fetch_first_shared_ancestor_indexed
00247 
00248   Arg [1]    : Bio::EnsEMBL::Compara::NestedSet $node1
00249   Arg [2]    : Bio::EnsEMBL::Compara::NestedSet $node2
00250   Arg [n]    : Bio::EnsEMBL::Compara::NestedSet $node_n
00251   Example    : $lca = $nested_set_adaptor->fetch_first_shared_ancestor_indexed($node1, $node2);
00252   Description: Returns the first node of the tree that is an ancestor of all the nodes passed
00253                as arguments. There must be at least one argument, and all the nodes must share
00254                the same root
00255   Returntype : Bio::EnsEMBL::Compara::NestedSet
00256   Exceptions : thrown if the nodes don't share the same root_id
00257 
00258 =cut
00259 sub fetch_first_shared_ancestor_indexed {
00260   my $self = shift;
00261   
00262   my $node1 = shift;
00263   my $root_id = $node1->_root_id;
00264   my $min_left = $node1->left_index;
00265   my $max_right = $node1->right_index;
00266 
00267   while (my $node2 = shift) {
00268     if ($node2->_root_id != $root_id) {
00269       throw("Nodes must have the same root in fetch_first_shared_ancestor_indexed ($root_id != ".($node2->_root_id).")\n");
00270     }
00271     $min_left = $node2->left_index if $node2->left_index < $min_left;
00272     $max_right = $node2->right_index if $node2->right_index > $max_right;
00273   }
00274 
00275   my $alias = $self->tables->[0]->[1];
00276   my $constraint = "WHERE $alias.root_id=$root_id AND $alias.left_index <= $min_left";
00277   $constraint .= " AND $alias.right_index >= $max_right";
00278   my $final = " ORDER BY ($alias.right_index-$alias.left_index) LIMIT 1";
00279   
00280   my $ancestor = $self->_generic_fetch($constraint, '', $final)->[0];
00281   return $ancestor;
00282 }
00283 
00284 
00285 
00286 ###########################
00287 # STORE methods
00288 ###########################
00289 
00290 sub update {
00291   my ($self, $node) = @_;
00292 
00293   unless(UNIVERSAL::isa($node, 'Bio::EnsEMBL::Compara::NestedSet')) {
00294     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00295   }
00296 
00297   my $parent_id = 0;
00298   if($node->parent) {
00299     $parent_id = $node->parent->node_id ;
00300   }
00301   my $root_id = $node->root->node_id;
00302 
00303  my $table= $self->tables->[0]->[0];
00304   my $sql = "UPDATE $table SET ".
00305                "parent_id=$parent_id".
00306                ",root_id=$root_id".
00307                ",left_index=" . $node->left_index .
00308                ",right_index=" . $node->right_index .
00309                ",distance_to_parent=" . $node->distance_to_parent .
00310              " WHERE $table.node_id=". $node->node_id;
00311 
00312   $self->dbc->do($sql);
00313 }
00314 
00315 
00316 sub update_subtree {
00317   my $self = shift;
00318   my $node = shift;
00319 
00320   $self->update($node);
00321 
00322   foreach my $child (@{$node->children}) {
00323     $self->update_subtree($child);
00324   }
00325 }
00326 
00327 =head2 store
00328 
00329   Arg [1]    :
00330   Example    :
00331   Description:
00332   Returntype :
00333   Exceptions :
00334   Caller     :
00335 
00336 =cut
00337 
00338 sub store {
00339   my ($self, $node) = @_;
00340 
00341   throw("must subclass and provide correct table names");
00342 
00343   unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
00344     throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
00345   }
00346 
00347   my $sth = $self->prepare("INSERT INTO tree_node (parent_id, name) VALUES (?,?)");
00348   if(defined($node->parent_node)) {
00349     $sth->execute($node->parent_node->dbID, $node->name);
00350   } else {
00351     $sth->execute(0, $node->name);
00352   }
00353   $node->dbID( $sth->{'mysql_insertid'} );
00354   $node->adaptor($self);
00355   $sth->finish;
00356 
00357   #
00358   #now recursively do all the children
00359   #
00360   my $children = $node->children_nodes;
00361   foreach my $child_node (@$children) {
00362     $self->store($child_node);
00363   }
00364 
00365   return $node->dbID;
00366 }
00367 
00368 =head2 sync_tree_leftright_index
00369 
00370   Arg [1]    : Bio::EnsEMBL::Compara::NestedSet $root
00371   Arg [2]    : Boolean; indicates if you wish to use a fresh database 
00372                connection to perform any locking. If you are within an existing
00373                transaction this is a good idea to avoid locking the LR table
00374                for the duration of your transaction
00375   Example    : $nsa->sync_tree_leftright_index($root);
00376   Description: For the given root this method looks for left right index
00377                offset recorded in lr_index_offset for the configured
00378                table. The program locks on this table to reserve a batch
00379                of identifiers which are then used to left_right index
00380                the tree.
00381 
00382                The left right indexing is called by this method on your given
00383                tree root
00384   Returntype : Nothing
00385   Exceptions : Only raised from DBI problems
00386   Caller     : Public
00387 
00388 =cut
00389 
00390 sub sync_tree_leftright_index {
00391   my ($self, $tree_root, $use_fresh_connection) = @_;
00392   my $starting_lr_index = $self->_get_starting_lr_index($tree_root, $use_fresh_connection);
00393   $tree_root->build_leftright_indexing($starting_lr_index);
00394   return;
00395 }
00396 
00397 ##
00398 ## Offset is pre-calculated by taking the number of nodes in the tree
00399 ## and multiplying by 2. This is then stored & passed back to
00400 ## sync_tree_leftright_index()
00401 ##
00402 sub _get_starting_lr_index {
00403   my ($self, $tree_root, $use_fresh_connection) = @_;
00404 
00405   my $table = $self->_lr_table_name();
00406   my $node_count = scalar(@{$tree_root->get_all_nodes()});
00407   my $lr_ids_needed = $node_count*2;
00408   
00409   my $select_sql = 'SELECT lr_index_offset_id, lr_index FROM lr_index_offset WHERE table_name =? FOR UPDATE';
00410   my $update_sql = 'UPDATE lr_index_offset SET lr_index =? WHERE lr_index_offset_id =?';
00411 
00412   my $conn = ($use_fresh_connection) ?
00413     Bio::EnsEMBL::DBSQL::DBConnection->new(-DBCONN => $self->dbc()) :
00414     $self->dbc();
00415   my $h = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $conn);
00416 
00417   my $starting_lr_index;
00418   #Retry because this *cannot* fail due to NJTREE -> QuickTreeBreak flow
00419   $h->transaction(
00420     -RETRY => 3,
00421     -CONDITION => sub {
00422       my ($error) = @_;
00423       return ( $error =~ /deadlock/i ) ? 1 : 0;
00424     },
00425     -CALLBACK => sub {
00426       my $rows = $h->execute(-SQL => $select_sql, -PARAMS => [$table]);
00427       if(!@{$rows}) {
00428         throw("The table '${table}' does not have an entry in lr_index_offset");
00429       }
00430       my ($id, $max) = @{$rows->[0]};
00431       $starting_lr_index = $max+1;
00432       my $new_max = $max+$lr_ids_needed;
00433       $h->execute_update(-SQL => $update_sql, -PARAMS => [$new_max, $id]);
00434       return;
00435     }
00436   );
00437   
00438   $conn->disconnect_if_idle() if($use_fresh_connection);
00439 
00440   return $starting_lr_index;
00441 }
00442 
00443 sub _lr_table_name {
00444   my ($self) = @_;
00445   return $self->tables->[0]->[0];
00446 }
00447 
00448 ##################################
00449 #
00450 # Database related methods, sublcass overrides/inherits
00451 #
00452 ##################################
00453 
00454 sub tables {
00455   my $self = shift;
00456   throw("must subclass and provide correct table names");
00457 }
00458 
00459 sub columns {
00460   my $self = shift;
00461   throw("must subclass and provide correct column names");
00462 }
00463 
00464 sub left_join_clause {
00465   return "";
00466 }
00467 
00468 sub default_where_clause {
00469   my $self = shift;
00470   return '';
00471 }
00472 
00473 sub final_clause {
00474   my $self = shift;
00475   $self->{'final_clause'} = shift if(@_);
00476   return $self->{'final_clause'};
00477 }
00478 
00479 
00480 sub create_instance_from_rowhash {
00481   my $self = shift;
00482   my $rowhash = shift;
00483 
00484   #my $node = $self->cache_fetch_by_id($rowhash->{'node_id'});
00485   #return $node if($node);
00486 
00487   my $node = new Bio::EnsEMBL::Compara::NestedSet;
00488   $self->init_instance_from_rowhash($node, $rowhash);
00489 
00490   #$self->cache_add_object($node);
00491 
00492   return $node;
00493 }
00494 
00495 
00496 sub init_instance_from_rowhash {
00497   my $self = shift;
00498   my $node = shift;
00499   my $rowhash = shift;
00500 
00501   $node->adaptor($self);
00502   $node->node_id               ($rowhash->{'node_id'});
00503   $node->_parent_id            ($rowhash->{'parent_id'});
00504   $node->_root_id              ($rowhash->{'root_id'});
00505   $node->left_index            ($rowhash->{'left_index'});
00506   $node->right_index           ($rowhash->{'right_index'});
00507   $node->distance_to_parent    ($rowhash->{'distance_to_parent'});
00508 
00509   return $node;
00510 }
00511 
00512 
00513 ##################################
00514 #
00515 # INTERNAL METHODS
00516 #
00517 ##################################
00518 
00519 sub new {
00520   my $class = shift;
00521 
00522   my $self = $class->SUPER::new(@_);
00523 
00524   $self->{'_node_cache'} = [];
00525   return $self;
00526 }
00527 
00528 sub DESTROY {
00529   my $self = shift;
00530   $self->clear_cache;
00531   $self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
00532 }
00533 
00534 sub cache_fetch_by_id {
00535   my $self = shift;
00536   my $node_id = shift;
00537 
00538   for(my $index=0; $index<scalar(@{$self->{'_node_cache'}}); $index++) {
00539     my $node = $self->{'_node_cache'}->[$index];
00540     if($node->node_id == $node_id) {
00541       splice(@{$self->{'_node_cache'}}, $index, 1); #removes from list
00542       unshift @{$self->{'_node_cache'}}, $node; #put at front of list
00543       return $node;
00544     }
00545   }
00546   return undef;
00547 }
00548 
00549 
00550 sub cache_add_object
00551 {
00552   my $self = shift;
00553   my $node = shift;
00554 
00555   unshift @{$self->{'_node_cache'}}, $node; #put at front of list
00556   while(scalar(@{$self->{'_node_cache'}}) > 3000) {
00557     my $old = pop @{$self->{'_node_cache'}};
00558     #print("shrinking cache : "); $old->print_node;
00559   }
00560   return undef;
00561 }
00562 
00563 sub clear_cache {
00564   my $self = shift;
00565 
00566   $self->{'_node_cache'} = [];
00567   return undef;
00568 }
00569 
00570 sub _build_tree_from_nodes {
00571   my $self = shift;
00572   my $node_list = shift;
00573 
00574   #first hash all the nodes by id for fast access
00575   my %node_hash;
00576   foreach my $node (@{$node_list}) {
00577     $node->no_autoload_children;
00578     $node_hash{$node->node_id} = $node;
00579   }
00580 
00581   #next add children to their parents
00582   my $root = undef;
00583   foreach my $node (@{$node_list}) {
00584     my $parent = $node_hash{$node->_parent_id};
00585     if($parent) { $parent->add_child($node); }
00586     else { $root = $node; }
00587   }
00588   return $root;
00589 }
00590 
00591 
00592 ###################################
00593 #
00594 # _generic_fetch system
00595 #
00596 #####################################
00597 
00598 =head2 _generic_fetch
00599 
00600   Arg [1]    : (optional) string $constraint
00601                An SQL query constraint (i.e. part of the WHERE clause)
00602   Arg [2]    : (optional) string $logic_name
00603                the logic_name of the analysis of the features to obtain
00604   Example    : $fts = $a->_generic_fetch('WHERE contig_id in (1234, 1235)', 'Swall');
00605   Description: Performs a database fetch and returns feature objects in
00606                contig coordinates.
00607   Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates
00608   Exceptions : none
00609   Caller     : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::_generic_fetch
00610 
00611 =cut
00612 
00613 sub _generic_fetch {
00614   my ($self, $constraint, $join, $final_clause) = @_;
00615 
00616   my $sql = $self->_construct_sql_query($constraint, $join, $final_clause);
00617 
00618 #  print STDERR $sql,"\n";
00619   my $node_list = [];
00620   my $sth = $self->prepare($sql);
00621   $sth->execute;
00622   $node_list = $self->_objs_from_sth($sth);
00623   $sth->finish;
00624 
00625   return $node_list;
00626 }
00627 
00628 sub _construct_sql_query {
00629   my ($self, $constraint, $join, $final_clause) = @_;
00630 
00631   my @tables = @{$self->tables};
00632   my $columns = join(', ', @{$self->columns()});
00633 
00634   my $default_where = $self->default_where_clause;
00635   if($default_where) {
00636     if($constraint) {
00637       $constraint .= " AND $default_where ";
00638     } else {
00639       $constraint = " WHERE $default_where ";
00640     }
00641   }
00642 
00643   if ($join) {
00644     foreach my $single_join (@{$join}) {
00645       my ($tablename, $condition, $extracolumns) = @{$single_join};
00646       if ($tablename && $condition) {
00647         push @tables, $tablename;
00648 
00649         if($constraint) {
00650           $constraint .= " AND $condition";
00651         } else {
00652           $constraint = " WHERE $condition";
00653         }
00654       }
00655       if ($extracolumns) {
00656         $columns .= ", " . join(', ', @{$extracolumns});
00657       }
00658     }
00659   }
00660 
00661   #construct a nice table string like 'table1 t1, table2 t2'
00662   my $tablenames = join(', ', map({ join(' ', @$_) } @tables));
00663 
00664   my $sql = "SELECT $columns FROM $tablenames";
00665   $sql .= " ". $self->left_join_clause;
00666   $sql .= " $constraint" if($constraint);
00667 
00668   #append additional clauses which may have been defined
00669   if (!$final_clause) {
00670     $final_clause = $self->final_clause;
00671   }
00672   $sql .= " $final_clause" if($final_clause);
00673 
00674   return $sql;
00675 }
00676 
00677 
00678 1;