Archive Ensembl HomeArchive Ensembl Home
GOAProjectionEngine.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 
00005 =pod
00006 
00007 =head1 NAME
00008 
00009 Bio::EnsEMBL::Compara::Production::Projection::GOAProjectionEngine
00010 
00011 =head1 DESCRIPTION
00012 
00013 This is an extension of the ProjectionEngine object which provides methods
00014 for filtering according to rules discussed with the GOA team at the EBI.
00015 
00016 =head1 FILTERS
00017 
00018 =head2 DBEntry Filtering
00019 
00020 DBEntry objects are filtered based on the following
00021 
00022 =over 8
00023 
00024 =item The DB name equals GO
00025 
00026 =item DBEntry is defined and isa OntologyXref
00027 
00028 =item The GO term has one of the following evidence tags; IDA IEP IGI IMP IPI
00029 
00030 =back
00031 
00032 =head2 Homology Filtering
00033 
00034 Homology objects are filtered accordingly
00035 
00036 =over 8
00037 
00038 =item The description field is set to ortholog_one2one, 
00039       ortholog_one2many or ortholog_many2many
00040       
00041 =item Percentage identity of both homologous pepetides is greater than 40%
00042 
00043 =back
00044 
00045 =cut
00046 
00047 package Bio::EnsEMBL::Compara::Production::Projection::GOAProjectionEngine;
00048 
00049 use strict;
00050 use warnings;
00051 
00052 use base qw( Bio::EnsEMBL::Compara::Production::Projection::ProjectionEngine );
00053 
00054 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00055 use Bio::EnsEMBL::Utils::Exception qw(throw);
00056 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00057 
00058 use Bio::EnsEMBL::Compara::Production::Projection::FakeXrefHolder;
00059 
00060 use Data::Predicate::ClosurePredicate;
00061 use Data::Predicate::Predicates qw(:all);
00062 
00063 =head2 new()
00064 
00065   Arg[-dbentry_types] : The DBEntry database name to use. Defaults to GO
00066   Arg[-source]: String; defines the level to use for finding xrefs to project
00067                 which should be assigned to the SOURCE_NAME used in MEMBER
00068   Description : New method used for a new instance of the given object. 
00069                 Required fields are indicated accordingly. Fields are specified
00070                 using the Arguments syntax (case insensitive).
00071 
00072 =cut
00073 
00074 sub new {
00075   my ( $class, @args ) = @_;
00076   my $self = $class->SUPER::new(@args);
00077   
00078   my ($dbentry_types, $source) = rearrange([qw(dbentry_types source)], @args);
00079   
00080   $dbentry_types = $self->_dbentry_types_builder() if ! defined $dbentry_types;
00081   assert_ref( $dbentry_types, 'ARRAY' );
00082   $self->{dbentry_types} = $dbentry_types;
00083   
00084   $source ||= q{ENSEMBLPEP};
00085   throw "Do not understand the source $source" unless $self->_valid_sources()->{$source};
00086   $self->{source} = $source;
00087   
00088   return $self;
00089 }
00090 
00091 =head2 source()
00092 
00093   Description : Getter. Source used to define the level we use to get DBEntries
00094   from
00095 
00096 =cut
00097 
00098 sub source {
00099   my ($self, $source) = @_;
00100   return $self->{source};
00101 }
00102 
00103 =head2 dbentry_types()
00104 
00105   Description : Getter. Percentage identity in the source
00106   Can be customised by overriding C<_dbentry_types_builder>(). Defaults to
00107   an arrayref containing GO by default.
00108 
00109 =cut
00110 
00111 sub dbentry_types {
00112   my ($self) = @_;
00113   return $self->{dbentry_types};
00114 }
00115 
00116 =head2 excluded_terms()
00117 
00118 Used to remove terms from the projected items which are deemed as not-useful.
00119 This defaults to GO:0005515 (protein binding)
00120 
00121 =cut
00122 
00123 sub excluded_terms {
00124   my ($self) = @_;
00125   return [qw(GO:0005515)];
00126 }
00127 
00128 =head2 dbentry_source_object()
00129 
00130 Override of the method from the super engine which uses the FakeXrefHolder
00131 object to get Xrefs quickly. The class returned responds to the
00132 C<get_all_DBEntries()> subroutine call returning all of those Translation
00133 based DBEntry objects.
00134 
00135 The method looks at the type of member given which will instruct the level
00136 we perform projections at i.e. ENSEMBLGENE or ENSEMBLPEP
00137 
00138 =cut
00139 
00140 sub dbentry_source_object {
00141   my ($self, $member) = @_;
00142   my $decoded = $self->_decode_member($member);
00143   return Bio::EnsEMBL::Compara::Production::Projection::FakeXrefHolder->build_peptide_dbentries_from_Member($decoded, $self->dbentry_types());
00144 }
00145 
00146 =head2 build_projection()
00147 
00148   Arg[1]      : Member; source member of projection
00149   Arg[2]      : Member; target member of projection
00150   Arg[3]      : Source attribute
00151   Arg[4]      : Target attribute
00152   Arg[5]      : DBEntry projected
00153   Arg[6]      : The homology used for projection
00154   Description : Provides an abstraction to building a projection from a 
00155                 set of elements.
00156   Returntype  : Projection object. Can be null & the current projection code
00157                 will ignore it
00158 
00159 =cut
00160 
00161 sub build_projection {
00162   my ($self, $query_member, $target_member, $query_attribute, $target_attribute, $dbentry, $homology) = @_;
00163   return Bio::EnsEMBL::Compara::Production::Projection::Projection->new(
00164     -ENTRY => $dbentry,
00165     -FROM => $self->_decode_member($query_member),
00166     -TO => $self->_decode_member($target_member),
00167     -FROM_IDENTITY => $query_attribute->perc_id(),
00168     -TO_IDENTITY => $target_attribute->perc_id(),
00169     -TYPE => $homology->description()
00170   );
00171 }
00172 
00173 sub _decode_member {
00174   my ($self, $member) = @_;
00175   my $dispatch = {
00176     ENSEMBLPEP => sub {
00177       my ($member) = @_;
00178       if($member->source_name() eq 'ENSEMBLPEP') {
00179         return $member;
00180       }
00181       else {
00182         return $member->get_canonical_peptide_Member();
00183       }
00184     },
00185     ENSEMBLGENE => sub {
00186       my ($member) = @_;
00187       if($member->source_name() eq 'ENSEMBLGENE') {
00188         return $member;
00189       }
00190       else {
00191         return $member->gene_member();
00192       }
00193     }
00194   };
00195   return $dispatch->{$self->source()}->($member);
00196 }
00197 
00198 ###### BUILDERS
00199 
00200 sub _dbentry_types_builder {
00201   my ($self) = @_;
00202   return ['GO'];
00203 }
00204 
00205 sub _homology_predicate_builder {
00206   my ($self) = @_;
00207   
00208   $self->log()->debug('Creating default Homology predicate');
00209   
00210   my @types = qw(ortholog_one2one ortholog_one2many ortholog_many2many);
00211   
00212   my $type_predicate = p_or(map { p_string_equals($_, 'description') } @types);
00213   
00214   my $percentage_identity_predicate = Data::Predicate::ClosurePredicate->new(closure => sub {
00215     my ($homology) = @_;
00216     my ($member_attribute_a, $member_attribute_b) = @{$homology->get_all_Member_Attribute()};
00217     return $member_attribute_a->[1]->perc_id() >= 40 && $member_attribute_b->[1]->perc_id() >= 40;
00218   }, description => 'Filtering of homology where both members had >= 40% identity');
00219   
00220   return p_and($type_predicate, $percentage_identity_predicate);
00221 }
00222 
00223 sub _dbentry_predicate_builder {
00224   my ($self) = @_;
00225   
00226   $self->log()->debug('Creating default DBEntry predicate');
00227   
00228   #Only accept if it is defined, was blessed, dbname == GO || PO & is a OntologyXref object
00229   my $entry_type_predicate = p_or(map { p_string_equals($_, 'dbname') } @{$self->dbentry_types()});
00230   my $correct_type_predicate = p_and(p_defined(), p_blessed(), $entry_type_predicate, p_isa('Bio::EnsEMBL::OntologyXref'));
00231   
00232   #Allowed linkage types; can be any of these so it's an OR
00233   #  IDA Inferred from direct assay
00234   #  IEA Inferred from electronic annotation
00235   #  IGI Inferred from genetic interaction
00236   #  IMP Inferred from mutant phenotype
00237   #  IPI Inferred from physical interaction
00238 
00239   #We do not use these  
00240   #  IC Inferred by curator
00241   #  ISS Inferred from sequence or structural similarity
00242   #  NAS Non-traceable author statement
00243   #  ND No biological data available
00244   #  RCA Reviewed computational analysis
00245   #  TAS Traceable author statement
00246   # check the $_->type() method
00247   my $allowed_linkage_predicate = p_or(map { p_string_equals($_) } qw(IDA IEP IGI IMP IPI));
00248   
00249   #Quick closure predicate which asserts that all the linkage types from a DBEntry can be found
00250   my $dbentry_has_allowed_linkage_predicate = Data::Predicate::ClosurePredicate->new(closure => sub {
00251     my ($dbentry) = @_;
00252     return $allowed_linkage_predicate->all_true($dbentry->get_all_linkage_types());
00253   });
00254   
00255   #Filter the excluded terms (defaults to protein_binding GO:0005515)
00256   my $excluded_terms = $self->excluded_terms();
00257   my @excluded_terms_predicates = map { p_string_equals($_, 'primary_id') } @{$excluded_terms};
00258   my $go_term_removal_predicate = p_not(p_or(@excluded_terms_predicates));
00259   
00260   #Build it together & return
00261   return p_and($correct_type_predicate, $go_term_removal_predicate, $dbentry_has_allowed_linkage_predicate);
00262 }
00263 
00264 ############### LOGIC
00265 
00266 =pod
00267 
00268 Override to provide more specific rules about allowing go xref transfer
00269 based on evidence tags.
00270 
00271 =cut
00272 
00273 sub _transfer_dbentry_by_targets {
00274   my ($self, $source, $targets) = @_;
00275   
00276   my $source_ref = ref($source);
00277   
00278   my $link_join = sub {
00279     my ($xref) = @_;
00280     return join(q{}, sort @{$source->get_all_linkage_types()});
00281   };
00282   
00283   foreach my $target_xref (@{$targets}) {
00284     
00285     next unless check_ref($target_xref, $source_ref);
00286     
00287     #Reject if it was the same
00288     if ( 
00289         $source->dbname() eq $target_xref->dbname() &&
00290           $source->primary_id() eq $target_xref->primary_id() &&
00291           $link_join->($source) eq $link_join->($target_xref)) {
00292           
00293       if($self->log()->is_trace()) {
00294         my $linkage_join = $link_join->($source);
00295         $self->log()->trace(sprintf(
00296           'Rejecting because target entity had a DBEntry (%d) with the same dbnames, primary ids & linkage type (%s) as the source DBEntry (%d)',
00297           $target_xref->dbID(), $linkage_join, $source->dbID()
00298         ));
00299       }
00300       
00301       return 0;
00302     }
00303 
00304     # if a GO term with the same accession, but IEA evidence code, exists, also don't project, as this
00305     # will lead to duplicates when the projected term has its evidence code changed to IEA after projection
00306     if ($source->primary_id() eq $target_xref->primary_id()) {
00307       foreach my $evidence_code (@{$target_xref->get_all_linkage_types()}) {
00308         if($evidence_code eq 'IEA') {
00309           if($self->log()->is_trace()) {
00310             $self->log()->trace(sprintf('Rejecting because %s is already projected by IEA', 
00311                $target_xref->primary_id()
00312               ));
00313           }
00314           return 0;
00315         }
00316       }
00317     }
00318   }
00319   
00320   return 1;
00321 }
00322 
00323 sub _valid_sources {
00324   my ($self) = @_;
00325   my %valid = map { $_ => 1} qw(ENSEMBLGENE ENSEMBLPEP);
00326   return \%valid;
00327 }
00328 
00329 1;