Archive Ensembl HomeArchive Ensembl Home
SqlHelper.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::Utils::SqlHelper
00024 
00025 =head1 VERSION
00026 
00027 $Revision: 1.22 $
00028 
00029 =head1 SYNOPSIS
00030 
00031   use Bio::EnsEMBL::Utils::SqlHelper;
00032 
00033   my $helper =
00034     Bio::EnsEMBL::Utils::SqlHelper->new( -DB_CONNECTION => $dbc );
00035 
00036   my $arr_ref = $helper->execute(
00037     -SQL      => 'select name, age from tab where col =?',
00038     -CALLBACK => sub {
00039       my @row = @{ shift @_ };
00040       return { name => $row[0], age => $row[1] };
00041     },
00042     -PARAMS => ['A'] );
00043 
00044   use Data::Dumper;
00045   print Dumper($arr_ref), "\n";
00046   # Prints out [name=>'name', age=>1] maybe ....
00047 
00048 
00049   # For transactional work; only works if your MySQL table
00050   # engine/database supports transactional work (such as InnoDB)
00051 
00052   $helper->transaction(
00053     -CALLBACK => sub {
00054       if ( $helper->execute_single_result(
00055                                       -SQL => 'select count(*) from tab'
00056            ) )
00057       {
00058         return $helper->execute_update('delete from tab');
00059       } else {
00060         return
00061           $helper->batch( -SQL  => 'insert into tab (?,?)',
00062                           -DATA => [ [ 1, 2 ], [ 1, 3 ], [ 1, 4 ] ] );
00063       }
00064     } );
00065 
00066 =head1 DESCRIPTION
00067 
00068 Easier database interaction
00069 
00070 =head1 METHODS
00071 
00072 See subrotuines.
00073 
00074 =cut
00075 
00076 package Bio::EnsEMBL::Utils::SqlHelper;
00077 
00078 use warnings;
00079 use strict;
00080 
00081 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00082 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00083 use Bio::EnsEMBL::Utils::Exception qw(throw);
00084 use Bio::EnsEMBL::Utils::Iterator;
00085 use English qw( -no_match_vars ); #Used for $PROCESS_ID
00086 use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection
00087 
00088 =pod
00089 
00090 =head2 new()
00091 
00092   Arg [DB_CONNECTION] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection
00093   Returntype          : Instance of helper
00094   Exceptions          : If the object given as a DBConnection is not one or it
00095                         was undefined
00096   Status              : Stable
00097 
00098 Creates a new instance of this object.
00099 
00100   my $dba = get_dba('mydb');    # New DBAdaptor from somewhere
00101   my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(
00102                                         -DB_CONNECTION => $dba->dbc() );
00103 
00104   $helper->execute_update( -SQL    => 'update tab set flag=?',
00105                            -PARAMS => [1] );
00106 
00107 =cut
00108 
00109 sub new {
00110     my ( $class, @args ) = @_;
00111     
00112     my ($db_connection) = rearrange([qw(db_connection)], @args);
00113      
00114     my $self = bless( {}, ref($class) || $class );
00115     throw('-DB_CONNECTION construction parameter was undefined.') 
00116      unless defined $db_connection;
00117     $self->db_connection($db_connection);
00118     
00119     return $self;
00120 }
00121 
00122 =pod
00123 
00124 =head2 db_connection()
00125 
00126   Arg [1]     : Bio::EnsEMBL::DBSQL::DBConnection $db_connection
00127   Description : Sets and retrieves the DBConnection 
00128   Returntype  : Bio::EnsEMBL::DBSQL::DBConnection
00129   Exceptions  : If the object given as a DBConnection is not one or if an 
00130                 attempt is made to set the value more than once
00131   Status      : Stable
00132 
00133 =cut
00134 
00135 sub db_connection {
00136   my ($self, $db_connection) = @_;
00137   if(defined $db_connection) {
00138     if(exists $self->{db_connection}) {
00139       throw('Cannot reset the DBConnection object; already defined ');
00140     }
00141     assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection', 'db_connection');
00142     $self->{db_connection} = $db_connection;
00143     weaken $self->{db_connection};
00144   }
00145   return $self->{db_connection};
00146 }
00147 
00148 # --------- SQL Methods
00149 
00150 =pod
00151 
00152 =head2 execute() - Execute a SQL statement with a custom row handler
00153 
00154   Arg [SQL]             : string SQL to execute
00155   Arg [CALLBACK]        : CodeRef; The callback to use for mapping a row to a data  
00156                           point; leave blank for a default mapping to a 2D array
00157   Arg [USE_HASHREFS]    : boolean If set to true will cause HashRefs to be returned 
00158                           to the callback & not ArrayRefs
00159   Arg [PARAMS]          : ArrayRef The binding parameters to the SQL statement
00160   Arg [PREPARE_PARAMS]  : boolean Parameters to be passed onto the Statement Handle 
00161                           prepare call
00162   Arg [ITERATOR]        : boolean Request a L<Bio::EnsEMBL::Utils::Iterator> 
00163                           rather than a 2D array
00164   Returntype :  ArrayRef/L<Bio::EnsEMBL::Utils::Iterator>
00165   Exceptions :  If errors occur in the execution of the SQL
00166   Status     :  Stable
00167 
00168   my $arr_ref = $helper->execute(
00169     -SQL      => 'select a,b,c from tab where col =?',
00170     -CALLBACK => sub {
00171       my @row = @{ shift @_ };
00172       return { A => $row[0], B => $row[1], C => $row[2] };
00173     },
00174     -PARAMS => ['A'] );
00175 
00176   #Or with hashrefs
00177   my $arr_ref = $helper->execute(
00178     -SQL          => 'select a,b,c from tab where col =?',
00179     -USE_HASHREFS => 1,
00180     -CALLBACK     => sub {
00181       my $row = shift @_;
00182       return { A => $row->{a}, B => $row->{b}, C => $row->{c} };
00183     },
00184     -PARAMS => ['A'] );
00185 
00186 Uses a callback defined by the C<sub> decalaration. Here we specify how
00187 the calling code will deal with each row of a database's result set. The
00188 sub can return any type of Object/hash/data structure you require.
00189 
00190 Should you not specify a callback then a basic one will be assigned to
00191 you which will return a 2D array structure e.g.
00192 
00193   my $arr_ref = $helper->execute(
00194                            -SQL => 'select a,b,c from tab where col =?',
00195                            -PARAMS => ['A'] );
00196 
00197 This is equivalent to DBI's c<selectall_arrayref()> subroutine.
00198 
00199 As an extension to this method you can write a closure subroutine which
00200 takes in two parameters. The first is the array/hash reference & the
00201 second is the statement handle used to execute. 99% of the time you will
00202 not need it but there are occasions where you do need it. An example of
00203 usage would be:
00204 
00205   my $conn = get_conn();    #From somwewhere
00206   my $arr_ref = $conn->execute(
00207     -SQL          => 'select a,b,c from tab where col =?',
00208     -USE_HASHREFS => 1,
00209     -CALLBACK     => sub {
00210       my ( $row, $sth ) = @_;
00211       #Then do something with sth
00212       return { A => $row->[0], B => $row->[1], C => $row->[2] };
00213     },
00214     -PARAMS => ['A'] );
00215 
00216 Any arguments to bind to the incoming statement. This can be a set of scalars
00217 or a 2D array if you need to specify any kind of types of sql objects i.e.
00218 
00219   use DBI qw(:sql_types);
00220 
00221   my $conn = get_conn();
00222   my $arr_ref = $conn->execute(
00223     -SQL =>
00224       'select a,b,c from tab where col =? and num_col=? and other=?',
00225     -USE_HASHREFS => 1,
00226     -CALLBACK     => sub {
00227       my @row = @{ shift @_ };
00228       return { A => $row[0], B => $row[1], C => $row[2] };
00229     },
00230     -PARAMS => [ '1', SQL_VARCHAR ],
00231     [ 2, SQL_INTEGER ],
00232     'hello' );
00233 
00234 Here we import DBI's sql types into our package and then pass in
00235 multiple anonymous array references as parameters. Each param is
00236 tested in the input and if it is detected to be an ARRAY reference we
00237 dereference the array and run DBI's bind_param method. In fact you can
00238 see each part of the incoming paramaters array as the contents to call
00239 C<bind_param> with. The only difference is the package tracks the bind
00240 position for you.
00241 
00242 We can get back a L<Bio::EnsEMBL::Utils::Iterator> object which can be used
00243 to iterate over the results set without first materializing the data into 
00244 memory. An example would be:
00245 
00246    my $iterator = $helper->execute(
00247                            -SQL => 'select a,b,c from tab where col =?',
00248                            -PARAMS => ['A'] 
00249                            -ITERATOR => 1);
00250    while($iterator->has_next()) {
00251      my $row = $iterator->next();
00252      #Do something
00253    }
00254 
00255 This is very useful for very large datasets.
00256 
00257 =cut
00258 
00259 sub execute {
00260     my ( $self, @args ) = @_;
00261     my ($sql, $callback, $use_hashrefs, $params, $prepare_params, $iterator) = 
00262      rearrange([qw(sql callback use_hashrefs params prepare_params iterator)], @args);
00263     my $has_return = 1;
00264     
00265     #If no callback then we execute using a default one which returns a 2D array
00266     if(!defined $callback) {
00267     throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs;
00268     $callback = $self->_mappers()->{array_ref};
00269     }
00270     
00271     return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator );
00272 }
00273 
00274 =pod
00275 
00276 =head2 execute_simple()
00277 
00278   Arg [SQL]           : string $sql
00279   Arg [PARAMS]        : ArrayRef $params
00280   Arg [CALLBACK]      : CodeRef $callback
00281   Returntype : ArrayRef of 1D elements
00282   Exceptions : If errors occur in the execution of the SQL
00283   Status     : Stable
00284 
00285   my $classification =
00286     $helper->execute_simple(
00287        -SQL =>
00288          'select meta_val from meta where meta_key =? order by meta_id',
00289        -PARAMS => ['species.classification'] );
00290 
00291 Identical to C<execute> except you do not specify a sub-routine reference. 
00292 Using this code assumes you want an array of single scalar values as returned 
00293 by the given SQL statement.
00294 
00295 =cut
00296 
00297 sub execute_simple {
00298   my ( $self, @args ) = @_;
00299     my ($sql, $params, $callback) = rearrange([qw(sql params callback)], @args);
00300     my $has_return = 1;
00301     my $use_hashrefs = 0;
00302     $callback ||= $self->_mappers()->{first_element};
00303     return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params);
00304 }
00305 
00306 =pod
00307 
00308 =head2 execute_no_return()
00309 
00310   Arg [SQL]           : string sql
00311   Arg [CALLBACK]      : CodeRef The callback to use for mapping a row to a data point;
00312                         we assume you are assigning into a data structure which
00313                         has requirements other than simple translation into an
00314                         array
00315   Arg [USE_HASHREFS]  : boolean If set to true will cause HashRefs to be returned 
00316                         to the callback & not ArrayRefs
00317   Arg [PARAMS]        : ArrayRef The binding parameters to the SQL statement
00318   Returntype : None
00319   Exceptions : If errors occur in the execution of the SQL
00320   Status     : Stable
00321 
00322 Whilst all other execute methods will return something; this assumes that the
00323 given mapper subroutine will be performing the business of placing values
00324 somewhere or doing something with them.
00325 
00326 There is a huge temptation to nest queries using this method; do not! Execute
00327 the values into an array using one of the other methods then run your subqueries
00328 on them; or make a better first query. SQL is flexible; so use it.
00329 
00330 =cut
00331 
00332 sub execute_no_return {
00333     my ( $self, @args ) = @_;
00334     my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
00335     throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback;
00336     my $has_return = 0;
00337     my $prepare_params = [];
00338     $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params);
00339     return;
00340 }
00341 
00342 =pod
00343 
00344 =head2 execute_into_hash()
00345 
00346   Arg [SQL]           : string $sql
00347   Arg [CALLBACK]      : CodeRef The callback to use for mapping to a value in a hash
00348                         keyed by the first element in your result set; 
00349                         leave blank for a default mapping to a scalar value
00350                         of the second element
00351   Arg [PARAMS]        : The binding parameters to the SQL statement
00352   Returntype : HashRef keyed by column 1 & value is the return of callback
00353   Exceptions : If errors occur in the execution of the SQL
00354   Status     : Stable
00355 
00356 A variant of the execute methods but rather than returning a list of
00357 mapped results this will assume the first column of a returning map &
00358 the calling subroutine will map the remainder of your return as the
00359 hash's key.
00360 
00361 B<This code can handle simple queries to hashes, complex value mappings
00362 and repeated mappings for the same key>.
00363 
00364 For example:
00365 
00366   my $sql    = 'select key, one, two from table where something =?';
00367   my $mapper = sub {
00368     my ( $row, $value ) = @_;
00369     #Ignore field 0 as that is being used for the key
00370     my $obj = Some::Obj->new( one => $row->[1], two => $row->[2] );
00371     return $obj;
00372   };
00373 
00374   my $hash =
00375     $helper->execute_into_hash( -SQL      => $sql,
00376                                 -CALLBACK => $mapper,
00377                                 -PARAMS   => ['val'] );
00378 
00379   #Or for a more simple usage
00380   my $sql = 'select biotype, count(gene_id) from gene group by biotype';
00381   my $biotype_hash = $conn->execute_into_hash( -SQL => $sql );
00382   print $biotype_hash->{protein_coding} || 0, "\n";
00383 
00384 The basic pattern assumes a scenario where you are mapping in a one
00385 key to one value. For more advanced mapping techniques you can use the
00386 second value passed to the subroutine paramater set. This is shown as
00387 C<$value> in the above examples. This value is what is found in the HASH
00388 being populated in the background. So on the first time you encounter it
00389 for the given key it will be undefined. For future invocations it will
00390 be set to the value you gave it. This allows us to setup code like the
00391 following
00392 
00393   my %args = ( -SQL => 'select meta_key, meta_value from meta '
00394                  . 'where meta_key =? order by meta_id',
00395                -PARAMS => ['species.classification'] );
00396 
00397   my $hash = $helper->execute_into_hash(
00398     %args,
00399     -CALLBACK => sub {
00400       my ( $row, $value ) = @_;
00401       $value = [] if !defined $value;
00402       push( @{$value}, $row->[1] );
00403       return $value;
00404     } );
00405 
00406   #OR
00407 
00408   $hash = $helper->execute_into_hash(
00409     %args,
00410     -CALLBACK => sub {
00411       my ( $row, $value ) = @_;
00412       if ( defined $value ) {
00413         push( @{$value}, $row->[1] );
00414         return;
00415       }
00416       my $new_value = [ $row->[1] ];
00417       return $new_value;
00418     } );
00419 
00420 The code understands that returning a defined value means to push this
00421 value into the background hash. In example one we keep on re-inserting
00422 the Array of classifications into the hash. Example two shows an early
00423 return from the callback which indicates to the code we do not have any
00424 value to re-insert into the hash. Of the two methods example one is
00425 clearer but is possibliy slower.
00426 
00427 B<Remember that the row you are given is the full row & not a view of
00428 the reminaing fields.> Therefore indexing for the data you are concerned
00429 with begins at position 1.
00430 
00431 =cut
00432 
00433 sub execute_into_hash {
00434     my ( $self, @args ) = @_;
00435     my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
00436     my $hash = {};
00437     
00438     #If no callback then we execute using a default one which sets value to 2nd element
00439     if(!defined $callback) {
00440      $callback = $self->_mappers()->{second_element};
00441     }
00442     
00443     #Default mapper uses the 1st key + something else from the mapper
00444     my $mapper = sub {
00445         my $row = shift @_;
00446         my $key = $row->[0];
00447         my $value = $hash->{$key};
00448         my $new_value = $callback->($row, $value);
00449         if($new_value) {
00450          $hash->{ $key } = $new_value;
00451         }
00452         return;
00453     };
00454     
00455     $self->execute_no_return(
00456       -SQL => $sql, 
00457       -CALLBACK => $mapper,
00458       -PARAMS => $params
00459     );
00460     
00461     return $hash;
00462 }
00463 
00464 =pod
00465 
00466 =head2 execute_single_result()
00467 
00468   Arg [SQL]           : string $sql
00469   Arg [CALLBACK]      : CodeRef The callback to use for mapping a row to a data point; 
00470                         leave blank for a default scalar mapping
00471   Arg [USE_HASHREFS]  : boolean If set to true will cause HashRefs to be returned 
00472                         to the callback & not ArrayRefs
00473   Arg [PARAMS]        : ArrayRef The binding parameters to the SQL statement
00474   Returntype : Scalar
00475   Exceptions : If errors occur in the execution of the SQL, if the query 
00476                returned more than 1 row and if we found no rows.
00477   Status     : Stable
00478 
00479   my $meta_count =
00480     $helper->execute_single_result(
00481                 -SQL => 'select count(*) from meta where species_id =?',
00482                 -PARAMS => [1] );
00483 
00484 Very similar to C<execute()> except it will raise an exception if we have more 
00485 or less than one row returned
00486 
00487 =cut
00488 
00489 sub execute_single_result {
00490     my ( $self, @args ) = @_;
00491     my ($sql, $callback, $use_hashrefs, $params) = rearrange(
00492      [qw(sql callback use_hashrefs params)], @args);
00493     
00494     my $results = $self->execute_simple( 
00495       -SQL => $sql, 
00496       -CALLBACK => $callback, 
00497       -USE_HASHREFS => $use_hashrefs, 
00498       -PARAMS => $params
00499     );
00500     
00501     my $result_count = scalar(@{$results});
00502     if($result_count != 1) {
00503       $params = [] if ! $params;
00504       my $type = ($result_count == 0) ? 'No' : 'Too many';
00505         my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params [";
00506         $msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} );
00507         $msg .= ']';
00508         throw($msg);
00509     }
00510     return $results->[0];
00511 }
00512 
00513 =pod
00514 
00515 =head2 transaction()
00516 
00517   Arg [CALLBACK]      : CodeRef The callback used for transaction isolation; once 
00518                         the subroutine exists the code will decide on rollback
00519                         or commit. Required
00520   Arg [RETRY]         : integer the number of retries to attempt with this 
00521                         transactional block. Defaults to 0. 
00522   Arg [PAUSE]         : integer the time in seconds to pause in-between retries.
00523                         Defaults to 1.
00524   Arg [CONDITION]     : CodeRef allows you to inspect the exception raised
00525                         and should your callback return true then the 
00526                         retry will be attempted. If not given then all 
00527                         exceptions mean attempt a retry (if specified)
00528   Returntype : Return of the callback
00529   Exceptions : If errors occur in the execution of the SQL
00530   Status     : Stable
00531 
00532   my $val = $helper->transaction(
00533     -CALLBACK => sub {
00534       my ($dbc) = @_;
00535       #Do something
00536       return 1;
00537     } );
00538 
00539   #Or because of the arguments method we use
00540   my $val = $helper->transaction(
00541     sub {
00542       my ($dbc) = @_;
00543       #Do something
00544       return 1;
00545     } );
00546 
00547 Creates a transactional block which will ensure that the connection is
00548 committed when your submmited subroutine has finished or will rollback
00549 in the event of an error occuring in your block.
00550 
00551 The code will always force AutoCommit off but will restore it to its
00552 previous setting. If your DBI/DBD driver does not support manual
00553 commits then this code will break. The code will turn off the
00554 C<disconnect_when_idle()> method to allow transactions to work as
00555 expected.
00556 
00557 An effect of using REPEATABLE READ transaction isolation (InnoDB's
00558 default) is that your data is as fresh as when you started your current
00559 transaction. To ensure the freshest data use C<SELECT ... from ... LOCK
00560 IN SHARE MODE> or C<SELECT ... from ... LOCK FOR UPDATE> if you are
00561 going to issue updates.
00562 
00563 Creating a transaction within a transaction results in the commit
00564 rollback statements occuring in the top level transaction. That way any
00565 block of code which is meant to to be transaction can be wrapped in
00566 this block ( assuming the same instance of SQLHelper is passed around &
00567 used).
00568 
00569 You can also request the retry of a transactional block of code which is
00570 causing problems. This is not a perfect solution as it indicates your
00571 programming model is broken. This mode can be specified as such:
00572 
00573   my $val = $helper->transaction(
00574     -RETRY => 3, -PAUSE => 2,
00575     -CALLBACK => sub {
00576       my ($dbc) = @_;
00577       #Do something
00578       return 1;
00579     } );
00580     
00581 The C<-RETRY> argument indicates the number of times we attempt the transaction 
00582 and C<-PAUSE> indicates the time in-between attempts. These retries will
00583 only occur in the root transaction block i.e. you cannot influence the 
00584 retry system in a sub transaction. You can influence if the retry is done with
00585 the C<-CONDITION> argument which accepts a Code reference (same as the
00586 C<-CALLBACK> parameter). This allows you to inspect the error thrown to
00587 retry only in some situations e.g.
00588 
00589   my $val = $helper->transaction(
00590     -RETRY => 3, -PAUSE => 2,
00591     -CALLBACK => sub {
00592       my ($dbc) = @_;
00593       #Do something
00594       return 1;
00595     },
00596     -CONDITION => sub {
00597       my ($error) = @_;
00598       return ( $error =~ /deadlock/ ) ? 1 : 0;
00599     }
00600   );
00601 
00602 Here we attempt a transaction and will B<only> retry when we have an error
00603 with the phrase deadlock.
00604 
00605 =cut
00606 
00607 sub transaction {
00608   my ($self, @args) = @_;
00609    
00610   my ($callback, $retry, $pause, $condition) = rearrange([qw(callback retry pause condition)], @args);
00611   
00612   throw('-CALLBACK was not a CodeRef. Got a reference of type ['.ref($callback).']. Check your parameters') 
00613     unless check_ref($callback, 'CODE');
00614   
00615   #Setup defaults
00616   $retry = 0 unless defined $retry;
00617   $pause = 1 unless defined $pause;
00618   if(! defined $condition) {
00619     $condition = sub {
00620       return 1;
00621     };
00622   }
00623   
00624   assert_ref($condition, 'CODE', '-CONDITION');
00625  
00626   my $dbc = $self->db_connection();
00627   my $original_dwi;
00628   my $ac;
00629   
00630   my $error;
00631   my $result;
00632   
00633   #If we were already in a transaction then we do not do any management of the
00634   #session & wait for the parent transaction(s) to finish
00635   my $perform_transaction = $self->_perform_transaction_code();
00636   if($perform_transaction) {
00637     ($original_dwi, $ac) = $self->_enable_transaction();
00638   }
00639   else {
00640     #If we were in a transaction then ignore any attempts at retry here
00641     $retry = 0;
00642   }
00643     
00644   for(my $iteration = 0; $iteration <= $retry; $iteration++) {
00645     eval {
00646       $result = $callback->($dbc);
00647       $dbc->db_handle()->commit() if $perform_transaction;
00648     };
00649     $error = $@;
00650     #If we were allowed to deal with the error then we apply rollbacks & then
00651     #retry or leave to the remainder of the code to throw
00652     if($perform_transaction && $error) {
00653       eval { $dbc->db_handle()->rollback(); };
00654       #If we were not on our last iteration then warn & allow the retry
00655       if($iteration != $retry) {
00656         if($condition->($error)) {
00657           warn("Encountered error on attempt ${iteration} of ${retry} and have issued a rollback. Will retry after sleeping for $pause second(s): $error");
00658           sleep $pause;
00659         }
00660         else {
00661           last; #break early if condition of error was not matched
00662         }
00663       }
00664     }
00665     
00666     #Always break the loop if we had a successful attempt
00667     last if ! $error;
00668   }
00669   
00670   if($perform_transaction) {
00671     $self->_disable_transaction($original_dwi, $ac);
00672   }
00673   
00674   throw("ABORT: Transaction aborted because of error: ${error}") if $error;
00675   
00676   return $result;
00677 }
00678 
00679 =pod
00680 
00681 =head2 execute_update()
00682 
00683   Arg [SQL]           : string $sql
00684   Arg [CALLBACK]      : CodeRef The callback to use for calling methods on the 
00685                         DBI statement handle or DBConnection object after an 
00686                         update command
00687   Arg [PARAMS]        : ArrayRef The binding parameters to the SQL statement
00688   Arg [PREPARE_PARAMS] : ArrayRef Parameters to bind to the prepare() StatementHandle call
00689   Returntype : Number of rows affected
00690   Exceptions : If errors occur in the execution of the SQL
00691   Status     : Stable
00692 
00693 Used for performing updates but conforms to the normal execute statement
00694 subroutines.
00695 
00696   use DBI qw(:sql_types);
00697   $helper->execute_update(-SQL => 'update tab set name = ? where id =?',
00698                           -PARAMS => [ 'andy', [ 1, SQL_INTEGER ] ] );
00699 
00700 If you need to do something a bit more advanced with your DML then you can
00701 give the method a closure and this will be called after the execute has been
00702 issued i.e.
00703 
00704   my $obj;
00705   $helper->execute_update(
00706     -SQL      => 'insert into tab (name) values(?)',
00707     -CALLBACK => sub {
00708       my ( $sth, $dbh ) = @_;
00709       $obj->{id} = $dbh->{mysql_insertid};
00710     },
00711     -PARAMS => [ $obj->name() ] );
00712 
00713 This lets us access the statement handle & database handle to access other
00714 properties such as the last identifier inserted.
00715 
00716 =cut
00717 
00718 sub execute_update {
00719   my ($self, @args) = @_;
00720   my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args);
00721   my $rv = 0;
00722   my $sth;
00723   eval {
00724     my @prepare_params;
00725     @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
00726     $sth = $self->db_connection()->prepare($sql, @prepare_params);
00727     $self->_bind_params($sth, $params);
00728     $rv = $sth->execute();
00729     $callback->($sth, $self->db_connection()->db_handle()) if $callback;
00730   };
00731   my $error = $@;
00732   $self->_finish_sth($sth);
00733   if($error) {
00734     my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params};
00735     throw("Cannot apply sql '${sql}' with params '${params}': ${error}");
00736   }
00737   return $rv;
00738 }
00739 
00740 =head2 execute_with_sth()
00741 
00742   Arg [SQL]             : string $sql
00743   Arg [CALLBACK]        : CodeRef The callback to use for working with the statement
00744                           handle once returned. This is B<not> a mapper.
00745   Arg [PARAMS]          : ArrayRef The binding parameters to the SQL statement
00746   Arg [PREPARE_PARAMS]  : ArrayRef Used to pass parameters to the statement handle 
00747                           prepare method
00748   Description : A subrotuine which abstracts resource handling and statement
00749                 preparing leaving the developer to define how to handle
00750                 and process the statement.
00751   Returntype  : Anything you wish to return from the callback
00752   Exceptions  : If errors occur in the execution of the SQL
00753   Status      : Stable
00754 
00755   my $meta_count = $helper->execute_with_sth(
00756     -SQL      => 'select count(*) from meta where species_id =?',
00757     -PARAMS   => [1],
00758     -CALLBACK => sub {
00759       my ($sth) = @_;
00760       my $count;
00761       $sth->bind_columns( \$count );
00762       while ( $sth->fetch ) {
00763         print $count, "\n";
00764       }
00765       return $count;
00766     } );
00767 
00768 Very similar to C<execute()> except this gives you full control over the
00769 lifecycle of the statement handle & how you wish to proceed with working
00770 with a statement handle. This is for situations where you believe going through
00771 the mappers causes too much of a slow-down (since we have to execute a
00772 subroutine for every row in order to map it correctly).
00773 
00774 However please benchmark before adopting this method as it increases the 
00775 complexity of your code and the mapper slow down only becomes apparent when
00776 working with very large numbers of rows.
00777 
00778 =cut
00779 
00780 sub execute_with_sth {
00781   my ($self, @args) = @_;
00782   my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args);
00783   return $self->_base_execute( $sql, 1, $params, $callback, $prepare_params );
00784 }
00785 
00786 =pod
00787 
00788 =head2 batch()
00789 
00790   Arg [SQL]           : string $sql
00791   Arg [CALLBACK]      : CodeRef The callback to use for working with the statement
00792                         handle once returned; specify this or -DATA
00793   Arg [DATA]          : ArrayRef The data to insert; specify this or -CALLBACK
00794   Arg [COMMIT_EVERY]  : Integer defines the rate at which to issue commits to
00795                         the DB handle. This is important when working with 
00796                         InnoDB databases since it affects the speed of rollback
00797                         (larger gaps inbetween commits means more to rollback).
00798                         
00799                         Ignored if using the callback version.
00800   Arg [PREPARE_PARAMS]  : ArrayRef Used to pass parameters to the statement handle 
00801                           prepare method
00802   Returntype : integer rows updated
00803   Exceptions : If errors occur in the execution of the SQL
00804   Status     : Stable
00805 
00806   my $alotofdata = getitfromsomewhere();
00807   $helper->batch(
00808     -SQL      => 'insert into table (one,two) values(?,?)',
00809     -CALLBACk => sub {
00810       my ( $sth, $dbc ) = @_;
00811       foreach my $data (@alotofdata) {
00812         $sth->execute( @{$data} );
00813       }
00814     } );
00815 
00816   #Or for a 2D array data driven approach
00817   $helper->batch( -SQL  => 'insert into table (one,two) values(?,?)',
00818                   -DATA => $alotofdata );
00819 
00820 Takes in a sql statement & a code reference. Your SQL is converted into a 
00821 prepared statement & then given as the first parameter to the closure. The
00822 second parameter is the DBH which created the statement. This is intended
00823 to let you do mass insertion into a database without the need to
00824 re-preparing the same statement.
00825 
00826 This can be combined with the transaction() code to provide a construct
00827 which does batch insertion & is transactionally aware.
00828 
00829 We can also use data based batch insertions i.e.
00830 
00831   #Needs to be like:
00832   #   [ [1,2], [3,4] ]
00833   #Or if using the DBI types:
00834   #  [ [ [ 1, SQL_INTEGER ], [ 2, SQL_INTEGER ] ],
00835   #    [ [ 3, SQL_INTEGER ], [ 4, SQL_INTEGER ] ] ];
00836 
00837   my $alotofdata = getitfromsomewhere();
00838   $helper->batch( -SQL  => 'insert into table (one,two) values(?,?)',
00839                   -DATA => $alotofdata );
00840 
00841 This does exactly what the previous example.
00842 
00843 All batch statements will return the value the callback computes. If you are 
00844 using the previous example with a data array then the code will return the
00845 number affected rows by the query.
00846 
00847 =cut
00848 
00849 sub batch {
00850   my ($self, @args) = @_;
00851   my ($sql, $callback, $data, $commit_every, $prepare_params) = 
00852     rearrange([qw(sql callback data commit_every prepare_params)], @args);
00853   
00854   if(! defined $callback && ! defined $data) {
00855     throw('You need to define a callback for insertion work or the 2D data array');
00856   }
00857   
00858   my $result;
00859   if(defined $callback) {
00860     $result = $self->_callback_batch($sql, $callback, $prepare_params);
00861   }
00862   else {
00863     $result = $self->_data_batch($sql, $data, $commit_every, $prepare_params);
00864   }
00865   return $result if defined $result;
00866   return;
00867 }
00868 
00869 #------- Internal methods
00870 
00871 sub _mappers {
00872   my ($self) = @_;
00873   if(! exists $self->{_mappers}) {
00874     $self->{_mappers} = {
00875       first_element => sub {
00876         my ($row) = @_;
00877         return $row->[0];
00878       },
00879       second_element => sub {
00880         my ($row) = @_;
00881         return $row->[1];
00882       },
00883       array_ref => sub {
00884         my $row = shift @_;
00885          return [@{$row}];
00886       }
00887     };
00888   }
00889   return $self->{_mappers};
00890 }
00891 
00892 sub _perform_transaction_code {
00893   my ($self) = @_;
00894   return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1;
00895 }
00896 
00897 sub _enable_transaction {
00898   my ($self) = @_;
00899   my $dbc = $self->db_connection();
00900   my $original_dwi = $dbc->disconnect_when_inactive();
00901   $dbc->disconnect_when_inactive(0);
00902   my $ac = $dbc->db_handle()->{'AutoCommit'};
00903   $dbc->db_handle()->{'AutoCommit'} = 0;
00904   $self->{_transaction_active}->{$PROCESS_ID} = 1;
00905   return ($original_dwi, $ac);
00906 }
00907 
00908 sub _disable_transaction {
00909   my ($self, $original_dwi, $ac) = @_;
00910   my $dbc = $self->db_connection();
00911   $dbc->db_handle()->{'AutoCommit'} = $ac;
00912   $dbc->disconnect_when_inactive($original_dwi);
00913   delete $self->{_transaction_active}->{$PROCESS_ID};
00914   return;
00915 }
00916 
00917 sub _bind_params {
00918     my ( $self, $sth, $params ) = @_;
00919     
00920     return if ! defined $params; #Return quickly if we had no data
00921     
00922     if(! check_ref($params, 'ARRAY')) {
00923       throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef});
00924     }
00925     
00926     my $count = 1;
00927     foreach my $param (@{$params}) {
00928         if ( check_ref($param, 'ARRAY') ) {
00929             $sth->bind_param( $count, @{$param} );
00930         }
00931         else {
00932             $sth->bind_param( $count, $param );
00933         }
00934         $count++;
00935     }
00936     return;
00937 }
00938 
00939 sub _execute {
00940     my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator ) = @_;
00941 
00942     throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE');
00943       
00944   my $iter = $self->_base_execute($sql, $has_return, $params, $callback, $prepare_params, $use_hashrefs);
00945   
00946   if($has_return) {
00947     return $iter if $iterator;
00948     return $iter->to_arrayref();
00949   }
00950   else {
00951     #Force iteration if we had no return since the caller is expecting this
00952     $iter->each(sub {});
00953   }
00954   return;
00955 }
00956 
00957 sub _base_execute {
00958   my ( $self, $sql, $has_return, $params, $callback, $prepare_params, $use_hashrefs ) = @_;
00959   
00960   throw('Not given a callback. _base_execute() must always been given a CodeRef') unless check_ref($callback, 'CODE');
00961     
00962     $params = [] unless $params;
00963     
00964     my $conn = $self->db_connection;
00965 
00966     my $error;
00967     my $sth_close_error;
00968     my $sth;
00969     my $iterator;
00970 
00971     eval {
00972       my @prepare_params;
00973       @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
00974         $sth = $conn->prepare($sql, @prepare_params);
00975         throw("Cannot continue as prepare() did not return a handle with prepare params '@prepare_params'") 
00976           unless $sth;
00977         $self->_bind_params( $sth, $params );
00978         $sth->execute();
00979     my $sth_processor;
00980     if($use_hashrefs) {
00981       $sth_processor = sub {
00982         while( my $row = $sth->fetchrow_hashref() ) {
00983             my $v = $callback->($row, $sth);
00984             return $v if $has_return;
00985           }
00986           $self->_finish_sth($sth);
00987         return undef;
00988       };
00989     }
00990     else {
00991       $sth_processor = sub {
00992         while( my $row = $sth->fetchrow_arrayref() ) {
00993                 my $v = $callback->($row, $sth);
00994             return $v if $has_return;
00995             }
00996             $self->_finish_sth($sth);
00997         return undef;
00998       };
00999     }
01000         $iterator = Bio::EnsEMBL::Utils::Iterator->new($sth_processor);
01001     };
01002     
01003     $error = $@;
01004     if($error) {
01005     throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error;
01006     }
01007     return $iterator;
01008 }
01009 
01010 sub _finish_sth {
01011   my ($self, $sth) = @_;
01012   eval { $sth->finish() if defined $sth; };
01013   warn('Cannot finish() the statement handle: $@') if $@;
01014   return;
01015 }
01016 
01017 sub _callback_batch {
01018   my ($self, $sql, $callback, $prepare_params) = @_;
01019   my $error;
01020   my $sth;
01021   my $closure_return;
01022   eval {
01023     my @prepare_params;
01024     @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
01025     $sth = $self->db_connection()->prepare($sql, @prepare_params); 
01026     $closure_return = $callback->($sth, $self->db_connection());
01027   };
01028   $error = $@;
01029   $self->_finish_sth($sth);
01030     throw("Problem detected during batch work: $error") if $error;
01031   
01032   return $closure_return if defined $closure_return;
01033   return;
01034 }
01035 
01036 sub _data_batch {
01037   my ($self, $sql, $data, $commit_every, $prepare_params) = @_;
01038   
01039   #Input checks
01040   assert_ref($data, 'ARRAY', '-DATA');
01041   my $data_length = scalar(@{$data});
01042   return 0 unless $data_length > 0;
01043   my $first_row = $data->[0];
01044   throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY');
01045 
01046   my $callback = sub {
01047     my ($sth, $dbc) = @_;
01048     my $total_affected = 0;
01049     #Iterate over each data point
01050     for(my $data_index = 0; $data_index < $data_length; $data_index++) {
01051       my $row = $data->[$data_index];
01052       $self->_bind_params($sth, $row);
01053       my $affected = eval {$sth->execute()};
01054       if($@) {
01055         throw("Problem working with $sql with params @{$row}: $@");
01056       }
01057       my $num_affected = ($affected) ? $affected :  0; #Get around DBI's 0E0
01058       $total_affected += $num_affected;
01059       
01060       #Lets us do a commit once every x rows apart from 0. We also finish
01061       #off with a commit if the code told us we were doing it
01062       if($commit_every) {
01063         if( ($data_index % $commit_every == 0) && $data_index != 0) {
01064           $dbc->db_handle()->commit();
01065         }
01066       }
01067     }
01068     
01069     #finish off with a commit if the code told us we were doing it
01070     if($commit_every) {
01071       $dbc->db_handle()->commit();
01072     }
01073     
01074     return $total_affected || 0;
01075   };
01076   
01077   return $self->_callback_batch($sql, $callback, $prepare_params)
01078 }
01079 
01080 1;