Archive Ensembl HomeArchive Ensembl Home
StableIdMapper.pm
Go to the documentation of this file.
00001 =pod 
00002 
00003 =head1 NAME
00004 
00005     Bio::EnsEMBL::Compara::RunnableDB::StableIdMapper
00006 
00007 =cut
00008 
00009 =head1 SYNOPSIS
00010 
00011         # compute and store the stable_id mapping between trees of rel.63 and trees of rel.64:
00012 
00013     time standaloneJob.pl Bio::EnsEMBL::Compara::RunnableDB::StableIdMapper \
00014         -compara_db "mysql://ensadmin:${ENSADMIN_PSW}@compara3/mm14_compara_homology_64" \
00015         -master_db "mysql://ensadmin:${ENSADMIN_PSW}@compara1/sf5_ensembl_compara_master" \
00016         -prev_rel_db "mysql://ensro@compara1/lg4_ensembl_compara_63" -release 64 -type t
00017 
00018 =cut
00019 
00020 =head1 DESCRIPTION
00021 
00022 This RunnableDB computes and stores stable_id mapping of either for ProteinTrees or Families between releases.
00023 
00024 =cut
00025 
00026 =head1 CONTACT
00027 
00028 Contact anybody in Compara.
00029 
00030 =cut
00031 
00032 
00033 package Bio::EnsEMBL::Compara::RunnableDB::StableIdMapper;
00034 
00035 
00036 use strict;
00037 use warnings;
00038 
00039 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00040 use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
00041 use Bio::EnsEMBL::Compara::StableId::Adaptor;
00042 use Bio::EnsEMBL::Compara::StableId::NamedClusterSetLink;
00043 use Bio::EnsEMBL::Hive::AnalysisJob;
00044 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00045 use Bio::EnsEMBL::Utils::Exception qw(throw);
00046 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00047 use Scalar::Util qw(looks_like_number);
00048 
00049 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00050 
00051 
00052 =head2 new_without_hive()
00053   
00054   Arg [DB_ADAPTOR] : DBAdaptor pointing to current Compara DB 
00055   Arg [TYPE] : The type of mapping to perform (f and t only supported)
00056   Arg [RELEASE] : The release of the current database
00057   Arg [PREV_RELEASE] : The release we are mapping IDs from
00058   Arg [PREV_RELEASE_DB] : DBAdaptor or HASH of the connection details 
00059                           to the prevous release database
00060   Arg [MASTER_DB] : DBAdaptor or HASH of the connection details to the 
00061                     master database instance
00062   Returntype  : An instance of this class
00063   Description : Builds an instance of this runnable to be used outside of a 
00064                 hive process 
00065   Exceptions  : If DBAdaptor is not a Compara DBAdaptor
00066   Status      : Beta  
00067  
00068 =cut
00069 
00070 sub new_without_hive {
00071   my ($class, @args) = @_;
00072   my ($db_adaptor, $type, $release, $prev_release, $prev_release_db, $master_db) = 
00073     rearrange([qw(db_adaptor type release prev_release prev_release_db master_db)], @args);
00074   
00075   assert_ref($db_adaptor, 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor');
00076   throw 'Need a -TYPE' unless $type; 
00077   throw 'Need a -RELEASE' unless $release;
00078   throw 'Need a -PREV_RELEASE_DB' unless $prev_release_db;
00079   throw 'Need a -MASTER_DB' unless $master_db;
00080   
00081   my $self = bless {}, $class;
00082   #Put in so we can have access to $self->param()
00083   my $job = Bio::EnsEMBL::Hive::AnalysisJob->new();
00084   $self->input_job($job);
00085   
00086   $self->compara_dba($db_adaptor);
00087   $self->param('type',          $type);
00088   $self->param('release',       $release);
00089   $self->param('prev_release',  $prev_release);
00090   $self->param('prev_rel_db',   $prev_release_db);
00091   $self->param('master_db',     $master_db);
00092   
00093   return $self;
00094 }
00095 
00096 
00097 =head2 run_without_hive()
00098   
00099   Returntype  : None
00100   Description : Runs the three stages of the hive process in one continous
00101                 call.
00102   Exceptions  : Lots possible from bad identifier mappings
00103   Status      : Beta  
00104  
00105 =cut
00106 
00107 sub run_without_hive {
00108   my ($self) = @_;
00109   $self->fetch_input();
00110   $self->run();
00111   $self->write_output();
00112   return;
00113 }
00114 
00115 
00116 sub fetch_input {
00117   my $self = shift @_;
00118   
00119   my $prev_rel_db  = $self->param('prev_rel_db');
00120   if(! $prev_rel_db) {
00121     print q{Not running as 'prev_rel_db' not given in parameters}."\n" if $self->debug();
00122     return;
00123   }
00124 
00125   $self->param('master_db')                       || throw "'master_db' is a required parameter";
00126   my $type         = $self->param('type')         || throw "'type' is a required parameter, please set it in the input_id hashref to 'f' or 't'";
00127   my $curr_release = $self->param('release')      || throw "'release' is a required numeric parameter, please set it in the input_id hashref";
00128   looks_like_number($curr_release)                || throw "'release' is a numeric parameter. Check your input";
00129   my $prev_release = $self->param('prev_release') || $curr_release - 1;
00130   my $prev_rel_dbc = $prev_rel_db && $self->go_figure_compara_dba($prev_rel_db)->dbc();
00131 
00132   my $adaptor   = Bio::EnsEMBL::Compara::StableId::Adaptor->new();
00133   my $from_ncs  = $adaptor->fetch_ncs($prev_release, $type, $prev_rel_dbc);
00134   my $to_ncs    = $adaptor->fetch_ncs($curr_release, $type, $self->compara_dba->dbc());
00135   my $ncsl      = Bio::EnsEMBL::Compara::StableId::NamedClusterSetLink->new(-FROM => $from_ncs, -TO => $to_ncs);
00136 
00137   $self->compara_dba()->dbc()->disconnect_when_inactive(1);
00138 
00139   $self->param('adaptor', $adaptor);
00140   $self->param('ncsl', $ncsl);
00141   $self->param('prev_release', $prev_release); #replace it with whatever it is now
00142 }
00143 
00144 
00145 sub run {
00146   my $self = shift @_;
00147   
00148   return if ! $self->param('prev_rel_db'); #bail out early
00149 
00150   my $type         = $self->param('type');
00151   my $curr_release = $self->param('release');
00152   my $prev_release = $self->param('prev_release');
00153 
00154   my $ncsl = $self->param('ncsl');
00155   my $postmap = $ncsl->maximum_name_reuse();
00156   $ncsl->to->apply_map($postmap);
00157 }
00158 
00159 
00160 sub write_output {
00161   my $self = shift @_;
00162 
00163   return if ! $self->param('prev_rel_db'); #bail out early
00164 
00165   my $adaptor   = $self->param('adaptor');
00166   my $ncsl      = $self->param('ncsl');
00167   my $master_db = $self->param('master_db');
00168 
00169   my $master_dbc = $master_db && $self->go_figure_compara_dba($master_db)->dbc();
00170   my $time_when_started_storing = time();  
00171   eval {
00172     $adaptor->store_map($ncsl->to, $self->compara_dba()->dbc());
00173     $adaptor->store_history($ncsl, $self->compara_dba()->dbc(), $time_when_started_storing, $master_dbc);
00174   };
00175   if($@) {
00176     throw "Detected error during store. Check your database settings are correct for the master database (read/write): $@";
00177   }
00178 }
00179 
00180 1;
00181