Archive Ensembl HomeArchive Ensembl Home
CheckGenomeReuse.pm
Go to the documentation of this file.
00001 
00002 =pod 
00003 
00004 =head1 NAME
00005 
00006 Bio::EnsEMBL::Compara::RunnableDB::ProteinTrees::CheckGenomedbReusability
00007 
00008 =head1 DESCRIPTION
00009 
00010 This Runnable checks whether a certain genome_db data can be reused for the purposes of ProteinTrees pipeline
00011 
00012 The format of the input_id follows the format of a Perl hash reference.
00013 Example:
00014     { 'genome_db_id' => 90 }
00015 
00016 supported keys:
00017     'genome_db_id'  => <number>
00018         the id of the genome to be checked (main input_id parameter)
00019         
00020     'release'       => <number>
00021         number of the current release
00022 
00023     'prev_release'  => <number>
00024         (optional) number of the previous release for reuse purposes (may coincide, may be 2 or more releases behind, etc)
00025 
00026     'registry_dbs'  => <list_of_dbconn_hashes>
00027         list of hashes with registry connection parameters (tried in succession).
00028 
00029     'reuse_this'    => <0|1>
00030         (optional) if defined, the code is skipped and this value is passed to the output
00031 
00032 =cut
00033 
00034 package Bio::EnsEMBL::Compara::RunnableDB::ProteinTrees::FromScratch::CheckGenomeReuse;
00035 
00036 use strict;
00037 use Bio::EnsEMBL::Registry;
00038 use Bio::EnsEMBL::DBLoader;
00039 use Bio::EnsEMBL::Compara::GenomeDB;
00040 
00041 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00042 
00043 sub run {
00044     my $self = shift @_;
00045 
00046     return if(defined($self->param('reuse_this')));  # bypass fetch_input() and run() in case 'reuse_this' has already been passed
00047 
00048     my $reuse_db = $self->param('reuse_db');
00049     my $reuse_this = 0;
00050 
00051     if (!$reuse_db) {
00052         $self->warning("reuse_db hash has not been set, so cannot reuse");
00053     } else {
00054 
00055         # Need to check that the genome_db_id has not changed (treat the opposite as a signal not to reuse) :
00056         my $reuse_compara_dba = $self->go_figure_compara_dba($reuse_db);    # may die if bad parameters
00057         my $reuse_genome_db_adaptor = $reuse_compara_dba->get_GenomeDBAdaptor();
00058         my $reuse_genome_db;
00059         eval {
00060             $reuse_genome_db = $reuse_genome_db_adaptor->fetch_by_taxon_id($self->param('ncbi_taxon_id'));
00061         };
00062         if ($reuse_genome_db) {
00063             $reuse_this = $reuse_genome_db->dbID;
00064             #$reuse_this = ($self->param('ncbi_taxon_id') > 1000 ? 0 : $reuse_genome_db->dbID);
00065             #$reuse_this = 0;
00066         } else {
00067             $self->warning("Could not fetch genome_db object for taxon_id ".$self->param('ncbi_taxon_id')." from reuse_db");
00068         }
00069     }
00070 
00071     # same base composition of the output, independent of the branch:
00072     my $output_hash = {
00073         'filename'           => $self->param('filename'),
00074         'ncbi_taxon_id'      => $self->param('ncbi_taxon_id'),
00075         'species_name'       => $self->param('species_name'),
00076         'reuse_this'         => $reuse_this ? 1 : 0,
00077     };
00078     if ($reuse_this) {
00079         ${$output_hash}{'genome_db_id'} = $reuse_this;
00080     }
00081 
00082     # The flow is split between branches 2 and 3 depending on $reuse_this:
00083     $self->dataflow_output_id( $output_hash, $reuse_this ? 3 : 2);
00084 }
00085 
00086 1;