Archive Ensembl HomeArchive Ensembl Home
ProjectionEngine.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 
00005 =pod
00006 
00007 =head1 NAME
00008 
00009 Bio::EnsEMBL::Compara::Production::Projection::ProjectionEngine
00010 
00011 =head1 DESCRIPTION
00012 
00013 This is a re-implementation of the code currently held in Ensembl's core
00014 API checkout which can project DBEntries from one species to another
00015 by using the Homologies projected from the Compara Genomics GeneTree
00016 pipeline. Normally this is used to project GO terms from a well annotated
00017 species to one which is not.
00018 
00019 Ensembl's original implementation involved a monolithic script with
00020 no scope for customisation. This implementation attempts to be as pluggable
00021 as possible by leveraging L<Data::Predicate>s (a way of encapsulating logic
00022 to allow a user to specify their own filters). This means the algorithm 
00023 for projection becomes
00024 
00025 =over 8
00026 
00027 =item Get all homologies projected between two species (one given at construction) the other when we run C<project>
00028 
00029 =item Filter them for allowed linkage using C<homology_predicate()> e.g. filter on allowed mappings or percentage identitiy limits 
00030 
00031 =item Loop through these homologies
00032 
00033 =item For each member of the homology get the DBEntry objects from the core database (delegates to Bio::EnsEMBL::Compara::Production::Projection::FakeXrefHolder)
00034 
00035 =item For each source DBEntry filter out using C<db_entry_predicate()> ensuring we want to work with this DBEntry type
00036 
00037 =item If we still have a DBEntry then make sure the target does not already have the DBEntry linked to it
00038 
00039 =item If still okay then build a C<Bio::EnsEMBL::Compara::Production::Projection::Projection> object based on this
00040 
00041 =item Return an ArrayRef of these projection objects 
00042 
00043 =back
00044 
00045 The main way to cut into this procedure is to give your own predicates
00046 during construction or to extend this module & reimplement the builder methods.
00047 
00048 =head1 CAVEATS
00049 
00050 =over 8
00051 
00052 =item This version is designed for a basic plant transfer algorithm
00053 
00054 =item We must consult both GO and PO External DBs because plant databases have mixed usage of these types
00055 
00056 =item We only project GOs
00057 
00058 =back
00059 
00060 =head1 AUTHOR
00061 
00062 Andy Yates (ayatesatebiacuk)
00063 
00064 =head1 CONTACT
00065 
00066 This modules is part of the EnsEMBL project (http://www.ensembl.org)
00067 
00068 Questions can be posted to the dev mailing list: dev@ensembl.org
00069 
00070 =cut
00071 
00072 package Bio::EnsEMBL::Compara::Production::Projection::ProjectionEngine;
00073 
00074 use strict;
00075 use warnings;
00076 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00077 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
00078 
00079 use Data::Predicate::Predicates qw(:all);
00080 use Bio::EnsEMBL::Compara::Production::Projection::Projection;
00081 
00082 =head2 new()
00083 
00084   Arg[-dbentry_predicate] : Predicate used to filter out DBEntry instances
00085   Arg[-homology_predicate] : Predicate used to filter out Homology instances
00086   Arg[-log] : Logger instance. Can be a Log::Log4perl::Logger instance or a class which implements the methods 
00087   Arg[-dba] : required; Compara adaptor to get homologies from
00088   Arg[-method_link_type] : Method link to get homologies from
00089   Arg[-genome_db] : required; GenomeDB to use as the source of the homologies
00090   Description : New method used for a new instance of the given object. 
00091                 Required fields are indicated accordingly. Fields are specified
00092                 using the Arguments syntax (case insensitive).
00093 
00094 =cut
00095 
00096 sub new {
00097   my ( $class, @args ) = @_;
00098   my $self = bless( {}, ref($class) || $class );
00099   my (  $dbentry_predicate, $homology_predicate, $log, $dba,
00100         $method_link_type, $genome_db ) = rearrange([ qw(
00101       dbentry_predicate homology_predicate log 
00102       dba method_link_type genome_db )
00103   ], @args);
00104 
00105   assert_ref( $dbentry_predicate, 'Data::Predicate' )
00106     if defined $dbentry_predicate;
00107   $self->{dbentry_predicate} = $dbentry_predicate
00108     if defined $dbentry_predicate;
00109 
00110   assert_ref( $homology_predicate, 'Data::Predicate' )
00111     if defined $homology_predicate;
00112   $self->{homology_predicate} = $homology_predicate
00113     if defined $homology_predicate;
00114 
00115   $log = $self->_log_builder() if !defined $log;
00116   confess('The attribute log must be specified during construction or provide a builder subroutine') if !defined $log;
00117   $self->{log} = $log if defined $log;
00118 
00119   assert_ref( $dba, 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor' );
00120   confess('The attribute dba must be specified during construction or provide a builder subroutine') if !defined $dba;
00121   $self->{dba} = $dba if defined $dba;
00122 
00123   $method_link_type = $self->_method_link_type_builder()
00124     if !defined $method_link_type;
00125   $self->{method_link_type} = $method_link_type if defined $method_link_type;
00126 
00127   assert_ref( $genome_db, 'Bio::EnsEMBL::Compara::GenomeDB' );
00128   confess('The attribute genome_db must be specified during construction or provide a builder subroutine' ) if !defined $genome_db;
00129   $self->{genome_db} = $genome_db if defined $genome_db;
00130 
00131   return $self;
00132 }
00133 
00134 =head2 dbentry_predicate()
00135 
00136   Description : Getter. Predicate used to filter out DBEntry instances
00137   Can be customised by overriding C<_dbentry_predicate_builder>().
00138 
00139 =cut
00140 
00141 sub dbentry_predicate {
00142   my ($self) = @_;
00143   if ( !exists $self->{dbentry_predicate} ) {
00144     $self->{dbentry_predicate} = $self->_dbentry_predicate_builder();
00145   }
00146 
00147   return $self->{dbentry_predicate};
00148 }
00149 
00150 =head2 homology_predicate()
00151 
00152   Description : Getter. Predicate used to filter out Homology instances
00153   Can be customised by overriding C<_homology_predicate_builder>().
00154 
00155 =cut
00156 
00157 sub homology_predicate {
00158   my ($self) = @_;
00159   if ( !exists $self->{homology_predicate} ) {
00160     $self->{homology_predicate} = $self->_homology_predicate_builder();
00161   }
00162   return $self->{homology_predicate};
00163 }
00164 
00165 =head2 log()
00166 
00167   Description : Getter. Logger instance
00168 
00169 =cut
00170 
00171 sub log {
00172   my ($self) = @_;
00173   return $self->{log};
00174 }
00175 
00176 =head2 dba()
00177 
00178   Description : Getter. Compara adaptor to get homologies from
00179 
00180 =cut
00181 
00182 sub dba {
00183   my ($self) = @_;
00184   return $self->{dba};
00185 }
00186 
00187 =head2 method_link_type()
00188 
00189   Description : Getter. Method link to get homologies from
00190   Can be customised by overriding C<_method_link_type_builder>(). Defaults to
00191   ENSEMBL_ORTHOLOGUES.
00192 
00193 =cut
00194 
00195 sub method_link_type {
00196   my ($self) = @_;
00197   return $self->{method_link_type};
00198 }
00199 
00200 =head2 genome_db()
00201 
00202   Description : Getter. GenomeDB to use as the source of the homologies
00203 
00204 =cut
00205 
00206 sub genome_db {
00207   my ($self) = @_;
00208   return $self->{genome_db};
00209 }
00210 
00211 
00212 ######BUILDERS
00213 
00214 sub _method_link_type_builder {
00215   my ($self) = @_;
00216   return 'ENSEMBL_ORTHOLOGUES';
00217 }
00218 
00219 my $imported_log4p = 0;
00220 
00221 sub _log_builder {
00222   my ($self) = @_;
00223   if(! $imported_log4p) {
00224     eval "require Log::Log4perl";
00225     if($@) {
00226       throw('Cannot build a logger because Log::Log4perl is not available. Detected error: '.$@);
00227     }
00228     $imported_log4p = 1;
00229   }
00230   return Log::Log4perl->get_logger(__PACKAGE__);
00231 }
00232 
00233 sub _homology_predicate_builder {
00234   my ($self) = @_;
00235   throw('Override to provide a default Homology predicate');
00236 }
00237 
00238 sub _dbentry_predicate_builder {
00239   my ($self) = @_;
00240   throw('Override to provide a default DBEntry predicate');
00241 }
00242 
00243 ######LOGIC
00244 
00245 =head2 project()
00246 
00247   Arg[0]      : GenomeDB object which is used as the projection target
00248   Description : Workhorse subroutine which loops through homologies and filters
00249                 through those and DBEntry objects using L<Data::Predicate>
00250                 objects. See class description for more information on the
00251                 filtering process.
00252   Returntype  : Bio::EnsEMBL::Compara::Production::Projection::Projection
00253   Exceptions  : If we cannot contact the target databases 
00254 
00255 =cut
00256 
00257 sub project {
00258   my ($self, $target_genome_db) = @_;
00259   
00260   my $log = $self->log();
00261   
00262   $log->info('Processing '.$self->genome_db()->name().' Vs. '.$target_genome_db->name());
00263   
00264   my $mlss = $self->_get_mlss($target_genome_db);
00265   my $homologies = $self->_homologies($mlss);
00266   
00267   my @projections;
00268   
00269   $log->info('Looping over '.scalar(@{$homologies}).' homologies');
00270   foreach my $homology (@{$homologies}) {
00271     my ($query_member, $query_attribute, $target_member, $target_attribute) = $self->_decode_homology($homology);
00272     
00273     if($self->log()->is_trace()) {
00274       my $q_id = $query_member->stable_id();
00275       my $t_id = $target_member->stable_id();
00276       $log->trace(sprintf('Projecting from %s to %s', $q_id, $t_id));
00277     }
00278     
00279     my $query_dbentry_holder = $self->dbentry_source_object($query_member);
00280     my $target_dbentry_holder = $self->dbentry_source_object($target_member);
00281     my $db_entries = $query_dbentry_holder->get_all_DBEntries();
00282     foreach my $dbentry (@{$db_entries}) {
00283       
00284       if($log->is_trace()) {
00285         $log->trace(sprintf('Working with %s from external db %s', $dbentry->primary_id(), $dbentry->dbname()));
00286       }
00287       
00288       my $filter_dbentry = $self->_filter_dbentry($dbentry, $target_dbentry_holder);
00289       if($filter_dbentry) {
00290         
00291         if($log->is_trace()) {
00292           $log->trace('Passes DBEntry filter');
00293         }
00294         
00295         if($self->_transfer_dbentry_by_targets($dbentry, $target_dbentry_holder->get_all_DBEntries(), $target_member->stable_id())) {
00296           $log->trace('DBEntry will be transferred');
00297           my $projection = $self->build_projection($query_member, $target_member, $query_attribute, $target_attribute, $dbentry, $homology);
00298           push(@projections, $projection) if defined $projection;
00299         }
00300         else {
00301           if($log->is_trace()) {
00302             $log->trace('Failed target entry transfer; check target for existing annotation or better quality annotation');
00303           }
00304         }
00305       }
00306       else {
00307         if($log->is_trace()) {
00308           $log->trace('Fails DBEntry filter');
00309         }
00310       }
00311     }
00312   }
00313   
00314   $log->info('Finished homology and have found '.scalar(@projections).' projection(s)');
00315   
00316   return \@projections;
00317 }
00318 
00319 =head2 build_projection()
00320 
00321   Arg[1]      : Member; source member of projection
00322   Arg[2]      : Member; target member of projection
00323   Arg[3]      : Source attribute
00324   Arg[4]      : Target attribute
00325   Arg[5]      : DBEntry projected
00326   Arg[6]      : The homology used for projection
00327   Description : Provides an abstraction to building a projection from a 
00328                 set of elements.
00329   Returntype  : Projection object. Can be null & the current projection code
00330                 will ignore it
00331 
00332 =cut
00333 
00334 sub build_projection {
00335   my ($self, $query_member, $target_member, $query_attribute, $target_attribute, $dbentry, $homology) = @_;
00336   return Bio::EnsEMBL::Compara::Production::Projection::Projection->new(
00337     -ENTRY => $dbentry,
00338     -FROM => $query_member->get_canonical_peptide_Member(),
00339     -TO => $target_member->get_canonical_peptide_Member(),
00340     -FROM_IDENTITY => $query_attribute->perc_id(),
00341     -TO_IDENTITY => $target_attribute->perc_id(),
00342     -TYPE => $homology->description()
00343   );
00344 }
00345 
00346 sub _get_mlss {
00347   my ($self, $target_genome_db) = @_;
00348   my $mlssa = $self->dba()->get_MethodLinkSpeciesSetAdaptor();
00349   my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs(
00350     $self->method_link_type(), [$self->genome_db(), $target_genome_db]);
00351   return $mlss;
00352 }
00353 
00354 sub _homologies {
00355   my ($self, $mlss) = @_;
00356   $self->log()->debug('Retriving homologies');
00357   my $homologies = $self->_get_homologies($mlss);
00358   $self->log()->debug('Filtering homologies');
00359   my $predicate = $self->homology_predicate();
00360   my $log = $self->log();
00361   my $trace = $log->is_trace();
00362   my @filtered;
00363   foreach my $h (@{$homologies}) {
00364     $log->trace(sprintf('Filtering homology %d', $h->dbID())) if $trace;
00365     if($predicate->apply($h)) {
00366       $log->trace('Accepted homology') if $trace;
00367       push(@filtered, $h);
00368     }
00369     else {
00370       $log->trace('Rejected homology') if $trace;
00371     }
00372   }
00373   $self->log()->debug('Finished filtering');
00374   return \@filtered;
00375 }
00376 
00377 sub _filter_dbentry {
00378   my ($self, $dbentry, $target_dbentry_holder) = @_;
00379   return $self->dbentry_predicate()->apply($dbentry);
00380 }
00381 
00382 sub _transfer_dbentry_by_targets {
00383   my ($self, $source, $targets) = @_;
00384 
00385   my $source_ref = ref($source);
00386 
00387   foreach my $target_xref (@{$targets}) {
00388     next unless check_ref($target_xref, $source_ref);
00389     #Reject if it was the same
00390     if ( $source->dbname() eq $target_xref->dbname() &&
00391         $source->primary_id() eq $target_xref->primary_id()) {
00392       return 0;
00393     }
00394   }
00395 
00396   return 1;
00397 }
00398 
00399 sub _decode_homology {
00400   my ($self, $homology) = @_;
00401   
00402   my @query;
00403   my @target;
00404   
00405   my @mas = @{$homology->get_all_Member_Attribute()};
00406   foreach my $ma (@mas) {
00407     my ($member) = @{$ma};
00408     if($member->genome_db()->dbID() == $self->genome_db()->dbID()) {
00409       @query = @{$ma};
00410     }
00411     else {
00412       @target = @{$ma};
00413     }
00414   }
00415   
00416   return (@query, @target);
00417 }
00418 
00419 sub _get_homologies {
00420   my ($self, $mlss) = @_;
00421   my $ha = $self->dba()->get_HomologyAdaptor();
00422   $self->log()->debug('Fetching homologues');
00423   my $homologies = $ha->fetch_all_by_MethodLinkSpeciesSet($mlss);
00424   return $homologies;
00425 }
00426 
00427 =head2 dbentry_source_object()
00428 
00429   Arg[1] : Member to get the DBEntry objects for
00430 
00431 =cut
00432 
00433 sub dbentry_source_object {
00434   my ($self, $member) = @_;
00435   throw('Unsupported operation called; override in the implementing class');
00436 }
00437 
00438 1;