Archive Ensembl HomeArchive Ensembl Home
BaseObject.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::IdMapping::BaseObject - base object for IdMapping objects
00024 
00025 =head1 SYNOPSIS
00026 
00027   # this object isn't instantiated directly but rather extended
00028   use Bio::EnsEMBL::IdMapping::BaseObject;
00029   our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
00030 
00031 =head1 DESCRIPTION
00032 
00033 This is the base object for some of the objects used in the IdMapping
00034 application. An object that extends BaseObject will have a ConfParser,
00035 Logger and Cache object. BaseObject also implements some useful utility
00036 functions related to file and db access.
00037 
00038 This isn't very clean OO design but it's efficient and easy to use...
00039 
00040 =head1 METHODS
00041 
00042   new
00043   get_filehandle
00044   file_exists
00045   fetch_value_from_db
00046   dump_table_to_file
00047   upload_file_into_table
00048   logger
00049   conf
00050   cache
00051 
00052 =cut
00053 
00054 
00055 package Bio::EnsEMBL::IdMapping::BaseObject;
00056 
00057 use strict;
00058 use warnings;
00059 no warnings 'uninitialized';
00060 
00061 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00062 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00063 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
00064 
00065 
00066 =head2 new
00067 
00068   Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
00069   Arg [CONF]  : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
00070   Arg [CACHE] : Bio::EnsEMBL::IdMapping::Cache $cache - a cache object
00071   Example     : my $object = Bio::EnsEMBL::IdMapping::BaseObjectSubclass->new(
00072                   -LOGGER => $logger,
00073                   -CONF   => $conf,
00074                   -CACHE  => $cache
00075                 );
00076   Description : Constructor
00077   Return type : implementing subclass type
00078   Exceptions  : thrown on wrong or missing arguments
00079   Caller      : general
00080   Status      : At Risk
00081               : under development
00082 
00083 =cut
00084 
00085 sub new {
00086   my $caller = shift;
00087   my $class = ref($caller) || $caller;
00088 
00089   my ($logger, $conf, $cache) = rearrange(['LOGGER', 'CONF', 'CACHE'], @_);
00090 
00091   unless ($logger and ref($logger) and
00092           $logger->isa('Bio::EnsEMBL::Utils::Logger')) {
00093     throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
00094   }
00095   
00096   unless ($conf and ref($conf) and
00097           $conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
00098     throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
00099   }
00100   
00101   unless ($cache and ref($cache) and
00102           $cache->isa('Bio::EnsEMBL::IdMapping::Cache')) {
00103     throw("You must provide configuration as a Bio::EnsEMBL::IdMapping::Cache object.");
00104   }
00105   
00106   my $self = {};
00107   bless ($self, $class);
00108 
00109   # initialise
00110   $self->logger($logger);
00111   $self->conf($conf);
00112   $self->cache($cache);
00113   
00114   return $self;
00115 }
00116 
00117 
00118 =head2 get_filehandle 
00119 
00120   Arg[1]      : String $filename - filename for filehandle
00121   Arg[2]      : String $path_append - append subdirectory name to basedir
00122   Arg[3]      : String $mode - filehandle mode (<|>|>>)
00123   Example     : my $fh = $object->get_filehandle('mapping_stats.txt', 'stats',
00124                   '>');
00125                 print $fh "Stats:\n";
00126   Description : Returns a filehandle to a file for reading or writing. The file
00127                 is qualified with the basedir defined in the configuration and
00128                 an optional subdirectory name.
00129   Return type : filehandle
00130   Exceptions  : thrown on missing filename
00131   Caller      : general
00132   Status      : At Risk
00133               : under development
00134 
00135 =cut
00136 
00137 sub get_filehandle {
00138   my $self = shift;
00139   my $filename = shift;
00140   my $path_append = shift;
00141   my $mode = shift;
00142 
00143   throw("Need a filename for this filehandle.") unless (defined($filename));
00144   
00145   my $path = $self->conf->param('basedir');
00146   $path = path_append($path, $path_append) if (defined($path_append));
00147 
00148   $mode ||= '>';
00149   
00150   open(my $fh, $mode, "$path/$filename") or
00151     throw("Unable to open $path/$filename: $!");
00152 
00153   return $fh;
00154 }
00155 
00156 
00157 =head2 file_exists
00158 
00159   Arg[1]      : String $filename - filename to test
00160   Arg[2]      : Boolean $path_append - turn on pre-pending of basedir
00161   Example     : unless ($object->file_exists('gene_mappings.ser', 1)) {
00162                   $object->do_gene_mapping;
00163                 }
00164   Description : Tests if a file exists and has non-zero size.
00165   Return type : Boolean
00166   Exceptions  : none
00167   Caller      : general
00168   Status      : At Risk
00169               : under development
00170 
00171 =cut
00172 
00173 sub file_exists {
00174   my $self = shift;
00175   my $filename = shift;
00176   my $path_append = shift;
00177 
00178   my $path = $self->conf->param('basedir');
00179   $path = path_append($path, $path_append) if (defined($path_append));
00180 
00181   return (-s "$path/$filename");
00182 }
00183 
00184 
00185 =head2 fetch_value_from_db 
00186 
00187   Arg[1]      : DBI::db $dbh - a DBI database handle
00188   Arg[2]      : String $sql - SQL statement to execute
00189   Example     : my $num_genes = $object->fetch_value_from_db($dbh,
00190                   'SELECT count(*) FROM gene');
00191   Description : Executes an SQL statement on a db handle and returns the first
00192                 column of the first row returned. Useful for queries returning a
00193                 single value, like table counts.
00194   Return type : Return type of SQL statement
00195   Exceptions  : thrown on wrong or missing arguments
00196   Caller      : general
00197   Status      : At Risk
00198               : under development
00199 
00200 =cut
00201 
00202 sub fetch_value_from_db {
00203   my $self = shift;
00204   my $dbh = shift;
00205   my $sql = shift;
00206 
00207   throw("Need a db handle.") unless ($dbh and $dbh->isa('DBI::db'));
00208   throw("Need an SQL query to execute.") unless ($sql);
00209 
00210   my $sth = $dbh->prepare($sql);
00211   $sth->execute;
00212   my ($retval) = $sth->fetchrow_array;
00213 
00214   return $retval;
00215 }
00216 
00217 
00218 =head2 dump_table_to_file 
00219 
00220   Arg[1]      : String $dbtype - db type (source|target)
00221   Arg[2]      : String $table - name of table to dump
00222   Arg[3]      : String $filename - name of dump file
00223   Arg[4]      : Boolean $check_existing - turn on test for existing dump
00224   Example     : my $rows_dumped = $object->dump_table_to_file('source',
00225                   'stable_id_event', 'stable_id_event_existing.txt');
00226   Description : Dumps the contents of a db table to a tab-delimited file. The
00227                 dump file will be written to a subdirectory called 'tables'
00228                 under the basedir from your configuration.
00229   Return type : Int - the number of rows dumped
00230   Exceptions  : thrown on wrong or missing arguments
00231   Caller      : general
00232   Status      : At Risk
00233               : under development
00234 
00235 =cut
00236 
00237 sub dump_table_to_file {
00238   my $self = shift;
00239   my $dbtype = shift;
00240   my $table = shift;
00241   my $filename = shift;
00242   my $check_existing = shift;
00243 
00244   # argument check
00245   unless (($dbtype eq 'source') or ($dbtype eq 'target')) {
00246     throw("Missing or unknown db type: $dbtype.");
00247   }
00248   throw("Need a table name.") unless ($table);
00249   throw("Need a filename.") unless ($filename);
00250 
00251   # conditionally check if table was already dumped
00252   if ($check_existing and $self->file_exists($filename, 'tables')) {
00253     $self->logger->info("$filename exists, won't dump again.\n");
00254     return 0;
00255   }
00256   
00257   my $fh = $self->get_filehandle($filename, 'tables');
00258 
00259   my $dba = $self->cache->get_DBAdaptor($dbtype);
00260   my $dbh = $dba->dbc->db_handle;
00261   my $sth = $dbh->prepare("SELECT * FROM $table");
00262   $sth->execute;
00263 
00264   my $i = 0;
00265 
00266   while (my @row = $sth->fetchrow_array) {
00267     $i++;
00268 
00269     # use '\N' for NULL values
00270     for (my $j = 0; $j < scalar(@row); $j++) {
00271       $row[$j] = '\N' unless (defined($row[$j]));
00272     }
00273     
00274     print $fh join("\t", @row);
00275     print $fh "\n";
00276   }
00277 
00278   $sth->finish;
00279   
00280   return $i;
00281 }
00282 
00283 
00284 =head2 upload_file_into_table
00285 
00286   Arg[1]      : String $dbtype - db type (source|target)
00287   Arg[2]      : String $table - name of table to upload the data to
00288   Arg[3]      : String $filename - name of dump file
00289   Arg[4]      : Boolean $no_check_empty - don't check if table is empty
00290   Example     : my $rows_uploaded = $object->upload_file_into_table('target',
00291                   'stable_id_event', 'stable_id_event_new.txt');
00292   Description : Uploads a tab-delimited data file into a db table. The data file
00293                 will be taken from a subdirectory 'tables' under your configured
00294                 basedir. If the db table isn't empty and $no_check_empty isn't
00295                 set, no data is uploaded (and a warning is issued).
00296   Return type : Int - the number of rows uploaded
00297   Exceptions  : thrown on wrong or missing arguments
00298   Caller      : general
00299   Status      : At Risk
00300               : under development
00301 
00302 =cut
00303 
00304 sub upload_file_into_table {
00305   my $self           = shift;
00306   my $dbtype         = shift;
00307   my $table          = shift;
00308   my $filename       = shift;
00309   my $no_check_empty = shift;
00310 
00311   # argument check
00312   unless ( ( $dbtype eq 'source' ) or ( $dbtype eq 'target' ) ) {
00313     throw("Missing or unknown db type: $dbtype.");
00314   }
00315   throw("Need a table name.") unless ($table);
00316   throw("Need a filename.")   unless ($filename);
00317 
00318   # sanity check for dry run
00319   if ( $self->conf->param('dry_run') ) {
00320     $self->logger->warning(
00321                        "dry_run - skipping db upload for $filename.\n");
00322     return;
00323   }
00324 
00325   my $file =
00326     join( '/', $self->conf->param('basedir'), 'tables', $filename );
00327   my $r = 0;
00328 
00329   if ( -s $file ) {
00330 
00331     $self->logger->debug( "$file -> $table\n", 1 );
00332 
00333     my $dba = $self->cache->get_DBAdaptor($dbtype);
00334     my $dbh = $dba->dbc->db_handle;
00335 
00336     my $idtable = 0;
00337     if ( $table =~ /^([^_]+)_stable_id/ ) {
00338       # This is a stable_id table we're working with.
00339       $idtable = 1;
00340       $table   = $1;
00341     }
00342 
00343     # check table is empty
00344     my ( $sql, $sth );
00345     unless ($no_check_empty) {
00346       if ($idtable) {
00347         $sql =
00348           qq(SELECT count(*) FROM $table WHERE stable_id IS NOT NULL);
00349       }
00350       else {
00351         $sql = qq(SELECT count(*) FROM $table);
00352       }
00353       $sth = $dbh->prepare($sql);
00354       $sth->execute;
00355       my ($c) = $sth->fetchrow_array;
00356       $sth->finish;
00357 
00358       if ( $c > 0 ) {
00359         if ($idtable) {
00360           $self->logger->warning(
00361                                "Table $table contains $c stable IDs.\n",
00362                                1 );
00363         }
00364         else {
00365           $self->logger->warning(
00366                           "Table $table not empty: found $c entries.\n",
00367                           1 );
00368         }
00369         $self->logger->info( "Data not uploaded!\n", 1 );
00370         return $r;
00371       }
00372     } ## end unless ($no_check_empty)
00373 
00374     # now upload the data
00375     if ($idtable) {
00376       # Create a temporary table, upload the data into it, and then
00377       # update the main table.
00378       $dbh->do(
00379         qq( CREATE TABLE stable_id_$$ (  object_id INTEGER UNSIGNED,
00380                                              stable_id VARCHAR(255),
00381                                              version SMALLINT UNSIGNED,
00382                                              created_date DATETIME,
00383                                              modified_date DATETIME,
00384                                              PRIMARY KEY(object_id) ) )
00385       );
00386 
00387       $dbh->do(
00388             qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE stable_id_$$));
00389 
00390       $dbh->do(
00391         qq(
00392       UPDATE $table, stable_id_$$
00393       SET $table.stable_id=stable_id_$$.stable_id,
00394           $table.version=stable_id_$$.version,
00395           $table.created_date=stable_id_$$.created_date,
00396           $table.modified_date=stable_id_$$.modified_date
00397       WHERE $table.${table}_id = stable_id_$$.object_id )
00398       );
00399 
00400       $dbh->do(qq(DROP TABLE stable_id_$$));
00401     } ## end if ($idtable)
00402     else {
00403       $dbh->do(qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE $table));
00404     }
00405     $dbh->do(qq(OPTIMIZE TABLE $table));
00406 
00407   } ## end if ( -s $file )
00408   else {
00409     $self->logger->warning( "No data found in file $filename.\n", 1 );
00410   }
00411 
00412   return $r;
00413 } ## end sub upload_file_into_table
00414 
00415 
00416 =head2 logger
00417 
00418   Arg[1]      : (optional) Bio::EnsEMBL::Utils::Logger - the logger to set
00419   Example     : $object->logger->info("Starting ID mapping.\n");
00420   Description : Getter/setter for logger object
00421   Return type : Bio::EnsEMBL::Utils::Logger
00422   Exceptions  : none
00423   Caller      : constructor
00424   Status      : At Risk
00425               : under development
00426 
00427 =cut
00428 
00429 sub logger {
00430   my $self = shift;
00431   $self->{'_logger'} = shift if (@_);
00432   return $self->{'_logger'};
00433 }
00434 
00435 
00436 =head2 conf
00437 
00438   Arg[1]      : (optional) Bio::EnsEMBL::Utils::ConfParser - the configuration
00439                 to set
00440   Example     : my $basedir = $object->conf->param('basedir');
00441   Description : Getter/setter for configuration object
00442   Return type : Bio::EnsEMBL::Utils::ConfParser
00443   Exceptions  : none
00444   Caller      : constructor
00445   Status      : At Risk
00446               : under development
00447 
00448 =cut
00449 
00450 sub conf {
00451   my $self = shift;
00452   $self->{'_conf'} = shift if (@_);
00453   return $self->{'_conf'};
00454 }
00455 
00456 
00457 =head2 cache
00458 
00459   Arg[1]      : (optional) Bio::EnsEMBL::IdMapping::Cache - the cache to set
00460   Example     : $object->cache->read_from_file('source');
00461   Description : Getter/setter for cache object
00462   Return type : Bio::EnsEMBL::IdMapping::Cache
00463   Exceptions  : none
00464   Caller      : constructor
00465   Status      : At Risk
00466               : under development
00467 
00468 =cut
00469 
00470 sub cache {
00471   my $self = shift;
00472   $self->{'_cache'} = shift if (@_);
00473   return $self->{'_cache'};
00474 }
00475 
00476 
00477 1;
00478