Archive Ensembl HomeArchive Ensembl Home
ProjectOntologyXref.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 
00005 =pod
00006 
00007 =head1 NAME
00008 
00009 Bio::EnsEMBL::Compara::Production::ProjectionRunnableDB::ProjectOntologyXref
00010 
00011 =head1 DESCRIPTION
00012 
00013 This object serves two functions. In the first instance it is a 
00014 RunnableDB instance to be used in a Hive pipeline and therefore 
00015 inherits from Hive's Process object. A second set of methods is provided 
00016 with the suffix C<without_hive> which allows you to use this object 
00017 outside of a Hive pipeline.
00018 
00019 The Runnable is here to bring together a ProjectionEngine with the
00020 GenomeDB instances it will work with and have it interact with a 
00021 ProjectionEngine writer (which can be a database or a file). See the
00022 C<fetch_input()> method for information on the parameters the module
00023 responds and to C<new_without_hive()> for information on how to use
00024 the module outside of hive.
00025 
00026 =head1 AUTHOR
00027 
00028 Andy Yates (ayatesatebiacuk)
00029 
00030 =head1 CONTACT
00031 
00032 This modules is part of the EnsEMBL project (http://www.ensembl.org)
00033 
00034 Questions can be posted to the dev mailing list: dev@ensembl.org
00035 
00036 =cut
00037 
00038 package Bio::EnsEMBL::Compara::Production::Projection::RunnableDB::ProjectOntologyXref;
00039 
00040 use strict;
00041 use warnings;
00042 
00043 use base qw(
00044   Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable
00045 );
00046 
00047 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00048 use Bio::EnsEMBL::Utils::Exception qw(throw);
00049 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00050 
00051 use File::Spec;
00052 
00053 use Bio::EnsEMBL::Hive::URLFactory;
00054 
00055 use Bio::EnsEMBL::Compara::Production::Projection::RunnableDB::RunnableLogger;
00056 use Bio::EnsEMBL::Compara::Production::Projection::Writer::ProjectedDBEntryWriter;
00057 use Bio::EnsEMBL::Compara::Production::Projection::Writer::ProjectedDisplayXrefWriter;
00058 use Bio::EnsEMBL::Compara::Production::Projection::Writer::ProjectedFileWriter;
00059 use Bio::EnsEMBL::Compara::Production::Projection::Writer::MultipleWriter;
00060 
00061 
00062 #--- Non-hive methods
00063 =head2 new_without_hive()
00064 
00065   Arg [PROJECTION_ENGINE]       : (ProjectionEngine) The projection engine to use to transfer terms 
00066   Arg [TARGET_GENOME_DB]        : (GenomeDB)  GenomeDB to project terms to
00067   Arg [WRITE_DBA]               : (DBAdaptor) Required if not given -FILE; used to 
00068   Arg [FILE]                    : (String) Location of pipeline output; if given a directory it will generate a file name
00069   
00070   Example    : See synopsis
00071   Description: Non-hive version of the object construction to be used with scripts
00072   Returntype : Bio::EnsEMBL::Compara::Production::Projection::RunnableDB::ProjectOntologyXref
00073   Exceptions : if PROJECTION_ENGINE was not given and was not a valid object. 
00074                Also if we had no GenomeDBs given
00075   Caller     : general
00076 
00077 =cut
00078 
00079 sub new_without_hive {
00080   my ($class, @params) = @_;
00081   
00082   my $self = bless {}, $class;
00083   
00084   my $job = Bio::EnsEMBL::Hive::AnalysisJob->new();
00085   $self->input_job($job);
00086   
00087   my ($projection_engine, $target_genome_db, $write_dba, $file, $debug) = rearrange(
00088     [qw(projection_engine target_genome_db write_dba file debug)], 
00089   @params);
00090   
00091   throw('-PROJECTION_ENGINE was not defined ') unless defined $projection_engine;
00092   $self->projection_engine($projection_engine);
00093   throw('-TARGET_GENOME_DB was not defined ') unless defined $target_genome_db;
00094   $self->target_genome_db($target_genome_db);
00095   throw('Need a -FILE or -WRITE_DBA parameter') if ! defined $write_dba && ! defined $file;
00096   $self->write_dba($write_dba) if defined $write_dba;
00097   $self->file($file) if defined $file;
00098     
00099   return $self;
00100 }
00101 
00102 =head2 run_without_hive()
00103 
00104 Performs the run() and write_output() calls in one method.
00105 
00106 =cut
00107 
00108 sub run_without_hive {
00109   my ($self) = @_;
00110   $self->run();
00111   $self->write_output();
00112   return;
00113 }
00114 
00115 =head2 fetch_input()
00116 
00117 Expect to see the following params:
00118 
00119 =over 8
00120 
00121 =item source_genome_db_id - Required GenomeDB ID
00122 
00123 =item target_genome_db_id - Required GenomeDB ID
00124 
00125 =item projection_engine_class - Required String which is the package of the engine to use
00126 
00127 =item method_link - Optional but should be the method_link class of the types of Homologies to get
00128 
00129 =item write_to_db - Boolean which if on will start writing results to a core DB
00130 
00131 =item core_db - String which should be a URL of the core DB to write to B<IF> the one available via the Registry is read-only
00132 
00133 =item write_to_file - Boolean which if on will start writing results to a file
00134 
00135 =item file - String indicating a directory to write to (auto generated file name) or a target file name. We do not automatically create directories
00136 
00137 =item engine_params - Give optional parameters to the engine if required
00138 
00139 =item source - The source of the DBEntries to use; specify the source_name as used in member
00140 
00141 =back
00142 
00143 =cut
00144 
00145 sub fetch_input {
00146   my ($self) = @_;
00147   
00148   my $compara_dba = $self->get_compara_dba();
00149   my $gdb_a = $compara_dba->get_GenomeDBAdaptor();
00150   
00151   throw('No source_genome_db_id given in input') if ! $self->param('source_genome_db_id');
00152   throw('No target_genome_db_id given in input') if ! $self->param('target_genome_db_id');
00153   throw('No projection_engine_class given in input') if ! $self->param('projection_engine_class');
00154   
00155   #Building the engine
00156   my $source_gdb = $gdb_a->fetch_by_dbID($self->param('source_genome_db_id'));
00157   my $log = Bio::EnsEMBL::Compara::Production::Projection::RunnableDB::RunnableLogger->new(-DEBUG => $self->debug());
00158   
00159   my $params = { -GENOME_DB => $source_gdb, -DBA => $compara_dba, -LOG => $log };
00160   $params->{-METHOD_LINK} = $self->param('method_link') if $self->param('method_link');
00161   $params->{-SOURCE} = $self->param('source') if $self->param('source');
00162   %{$params} = %{$self->param('engine_params')} if $self->param('engine_params');
00163   
00164   my $engine = $self->_build_engine($params);
00165   $self->projection_engine($engine);
00166   
00167   #Working with target GDB
00168   my $target_genome_db = $gdb_a->fetch_by_dbID($self->param('target_genome_db_id'));
00169   $self->target_genome_db($target_genome_db);
00170   
00171   #Setting up the outputs
00172   if($self->param('write_to_db')) {
00173     my $core_db = $self->param('core_db');
00174     my $adaptor = ($core_db) 
00175                 ? Bio::EnsEMBL::Hive::URLFactory->fetch($core_db) 
00176                 : $target_genome_db->db_adaptor();
00177     $self->write_dba($adaptor)
00178   }
00179   if($self->param('write_to_file')) {
00180     my $file = $self->param('file');
00181     throw 'No file param given in input' unless $file;
00182     $self->file($file);
00183   }
00184   
00185   return 1; 
00186 }
00187 
00188 =head2 run()
00189 
00190 Gets the engine, runs it & sets the output into projections
00191 
00192 =cut
00193 
00194 sub run {
00195   my ($self) = @_;
00196   my $engine = $self->projection_engine();
00197   my $projections = $engine->project($self->target_genome_db());
00198   $self->projections($projections);
00199   return 1;
00200 }
00201 
00202 =head2 write_output()
00203 
00204 Takes the output pushed into projections and sends them into the specified
00205 sources according to the options given.
00206 
00207 =cut
00208 
00209 sub write_output {
00210   my ($self) = @_;
00211   $self->_writer()->write();
00212   return 1;
00213 }
00214 
00215 #### Attributes
00216 
00217 =head2 projection_engine()
00218 
00219 The engine used to transfer terms.
00220 
00221 =cut
00222 
00223 sub projection_engine {
00224   my ($self, $projection_engine) = @_;
00225   if(defined $projection_engine) {
00226     assert_ref($projection_engine, 'Bio::EnsEMBL::Compara::Production::Projection::ProjectionEngine');
00227     $self->param('projection_engine', $projection_engine);
00228   }
00229   return $self->param('projection_engine');
00230 }
00231 
00232 =head2 target_genome_db()
00233 
00234 The GenomeDB instance used to project terms to
00235 
00236 =cut
00237 
00238 sub target_genome_db {
00239   my ($self, $target_genome_db) = @_;
00240   if(defined $target_genome_db) {
00241     assert_ref($target_genome_db, 'Bio::EnsEMBL::Compara::GenomeDB');
00242     $self->{target_genome_db} = $target_genome_db;
00243     $self->param('target_genome_db', $target_genome_db);
00244   }
00245   $self->param('target_genome_db');
00246 }
00247 
00248 =head2 projections()
00249 
00250 The projections we have projected; an ArrayRef of Projection objects
00251 
00252 =cut
00253 
00254 sub projections {
00255   my ($self, $projections) = @_;
00256   if(defined $projections && assert_ref($projections, 'ARRAY')) {
00257     $self->param('projections', $projections);
00258   }
00259   $self->param('projections');
00260 }
00261 
00262 =head2 _writer()
00263 
00264 Returns the writer instance depending on what was given during construction.
00265 
00266 =cut
00267 
00268 sub _writer {
00269   my ($self) = @_;
00270   if(! defined $self->param('writer')) {
00271     my $projections = $self->projections();
00272     my $writers = [];
00273     
00274     if($self->write_dba()) {
00275       if(check_ref($self->projection_engine(), 'Bio::EnsEMBL::Compara::Production::Projection::DisplayXrefProjectionEngine')) {
00276         push(@$writers, Bio::EnsEMBL::Compara::Production::Projection::Writer::ProjectedDisplayXrefWriter->new(
00277           -PROJECTIONS  => $projections,
00278           -DBA          => $self->write_dba()
00279         ));
00280       }
00281       else {
00282         push(@$writers, Bio::EnsEMBL::Compara::Production::Projection::Writer::ProjectedDBEntryWriter->new(
00283           -PROJECTIONS  => $projections,
00284           -DBA          => $self->write_dba()
00285         ));
00286       }
00287     }
00288     if($self->file()) {
00289       push(@$writers, Bio::EnsEMBL::Compara::Production::Projection::Writer::ProjectedFileWriter->new(
00290         -PROJECTIONS  => $projections,
00291         -FILE         => $self->_target_filename()
00292       ));
00293     }
00294     
00295     if(scalar(@{$writers}) > 1) {
00296       $self->{writer} = Bio::EnsEMBL::Compara::Production::Projection::Writer::MultipleWriter->new(
00297         -WRITERS      => $writers,
00298         -PROJECTIONS  => $projections 
00299       );
00300     }
00301     else {
00302       $self->param('writer', shift @{$writers});
00303     }
00304   }
00305   
00306   return $self->param('writer');
00307 }
00308 
00309 =head2 write_dba()
00310 
00311 A DBAdaptor instance which can write to a core DBAdaptor; assumed to be the
00312 same as the target GenomeDB.
00313 
00314 =cut
00315 
00316 sub write_dba {
00317   my ($self, $write_dba) = @_;
00318   $self->param('write_dba', $write_dba) if defined $write_dba;
00319   return $self->param('write_dba');
00320 }
00321 
00322 =head2 file()
00323 
00324 The file or directory to write to.
00325 
00326 =cut
00327 
00328 sub file {
00329   my ($self, $file) = @_;
00330   $self->param('file', $file) if defined $file;
00331   return $self->param('file');
00332 }
00333 
00334 =head2 _target_filename()
00335 
00336 If file is a file name we will return that. If it was a directory we will 
00337 return a automatically generated name (sourcename_to_targetname.txt)
00338 
00339 =cut
00340 
00341 sub _target_filename {
00342   my ($self) = @_;
00343   my $file = $self->file();
00344   if(-d $file) {
00345     my $source_genome_db = $self->projection_engine()->genome_db();
00346     my $target_genome_db = $self->target_genome_db();
00347     my $filename = sprintf('%s_to_%s.txt', $source_genome_db->name(), $target_genome_db->name());
00348     return File::Spec->catfile($file, $filename);
00349   }
00350   else {
00351     return $file;
00352   }
00353 }
00354 
00355 sub _build_engine {
00356   my ($self, $args) = @_;
00357   my $mod = $self->param('projection_engine_class');
00358   eval 'require '.$mod;
00359   throw("Cannot bring in the module ${mod}: $@") if $@;
00360   my $engine = $mod->new(%{$args});
00361   return $engine;
00362 }
00363 
00364 1;