Archive Ensembl HomeArchive Ensembl Home
CliHelper.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00005   Genome Research Limited.  All rights reserved.
00006 
00007   This software is distributed under a modified Apache license.
00008   For license details, please see
00009 
00010     http://www.ensembl.org/info/about/code_licence.html
00011 
00012 =head1 CONTACT
00013 
00014   Please email comments or questions to the public Ensembl
00015   developers list at <dev@ensembl.org>.
00016 
00017   Questions may also be sent to the Ensembl help desk at
00018   <helpdesk@ensembl.org>.
00019 
00020 =cut
00021 
00022 =head1 NAME
00023 
00024 Bio::EnsEMBL::Utils::CliHelper
00025 
00026 =head1 VERSION
00027 
00028 $Revision: 1.1 $
00029 
00030 =head1 SYNOPSIS
00031 
00032   use Bio::EnsEMBL::Utils::CliHelper;
00033 
00034   my $cli = Bio::EnsEMBL::Utils::CliHelper->new();
00035 
00036   # get the basic options for connecting to a database server
00037   my $optsd = $cli->get_dba_opts();
00038 
00039   # add the print option
00040   push(@$optsd,"print|p");
00041 
00042   # process the command line with the supplied options plus a reference to a help subroutine
00043   my $opts = $cli->process_args($optsd,\&usage);
00044   
00045   # use the command line options to get an array of database details
00046   for my $db_args (@{$cli->get_dba_args_for_opts($opts)}) {
00047     # use the args to create a DBA
00048     my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$db_args});
00049     ...
00050   }
00051   
00052   For adding secondary databases, a prefix can be supplied. For instance, to add a second set of
00053   db params prefixed with dna (-dnahost -dbport etc.) use the prefix argument with get_dba_opts and 
00054   get_dba_args_for_opts:
00055   # get the basic options for connecting to a database server
00056   my $optsd =
00057    [ @{ $cli_helper->get_dba_opts() }, @{ $cli_helper->get_dba_opts('gc') } ];
00058   # process the command line with the supplied options plus a help subroutine
00059   my $opts = $cli_helper->process_args( $optsd, \&usage );
00060   # get the dna details
00061   my ($dna_dba_details) =
00062     @{ $cli_helper->get_dba_args_for_opts( $opts, 1, 'dna' ) };
00063   my $dna_db =
00064     Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$dna_dba_details} ) );
00065 
00066 =head1 DESCRIPTION
00067 
00068 Utilities for a more consistent approach to parsing and handling EnsEMBL script command lines
00069 
00070 =head1 METHODS
00071 
00072 See subroutines.
00073 
00074 =cut
00075 
00076 package Bio::EnsEMBL::Utils::CliHelper;
00077 
00078 use warnings;
00079 use strict;
00080 
00081 use Carp;
00082 use Data::Dumper;
00083 use Getopt::Long qw(:config auto_version no_ignore_case);
00084 
00085 use Bio::EnsEMBL::DBSQL::DBConnection;
00086 use Bio::EnsEMBL::DBSQL::DBAdaptor;
00087 
00088 my $dba_opts = [ {
00089        args => [ 'host', 'dbhost', 'h' ],
00090        type => '=s' }, {
00091        args => [ 'port', 'dbport', 'P' ],
00092        type => ':i' }, {
00093        args => [ 'user', 'dbuser', 'u' ],
00094        type => '=s' }, {
00095        args => [ 'pass', 'dbpass', 'p' ],
00096        type => ':s' }, {
00097        args => ['dbname'],
00098        type => ':s' }, {
00099        args => ['pattern','dbpattern'],
00100        type => ':s' }, {
00101        args => ['driver'],
00102        type => ':s' }, {
00103        args => ['species_id'],
00104        type => ':i' } ];
00105 
00106 =head2 new()
00107 
00108   Description : Construct a new instance of a CliHelper object
00109   Returntype  : Bio::EnsEMBL::Utils:CliHelper
00110   Status      : Under development
00111 
00112 =cut
00113 
00114 sub new {
00115     my ( $class, @args ) = @_;
00116     my $self = bless( {}, ref($class) || $class );
00117     return $self;
00118 }
00119 
00120 =head2 get_dba_opts()
00121 
00122   Arg [1]     : Optional prefix for dbnames e.g. dna
00123   Description : Retrieves the standard options for connecting to one or more Ensembl databases
00124   Returntype  : Arrayref of option definitions
00125   Status      : Under development
00126 
00127 =cut
00128 
00129 sub get_dba_opts {
00130     my ( $self, $prefix ) = @_;
00131     $prefix ||= '';
00132     my @dba_opts = map {
00133         my $opt = join '|', map { $prefix . $_ } @{ $_->{args} };
00134         $opt . $_->{type};
00135     } @{$dba_opts};
00136     return \@dba_opts;
00137 }
00138 
00139 =head2 process_args()
00140 
00141     Arg [1]     : Arrayref of supported command line options (e.g. from get_dba_opts)
00142     Arg [2]     : Ref to subroutine to be invoked when -help or -? is supplied
00143     Description : Retrieves the standard options for connecting to one or more Ensembl databases
00144     Returntype  : Hashref of parsed options
00145     Status      : Under development
00146 
00147 =cut
00148 
00149 sub process_args {
00150     my ( $self, $opts_def, $usage_sub ) = @_;
00151     my $opts = {};
00152     push @{$opts_def}, q/help|?/ => $usage_sub;
00153     GetOptions( $opts, @{$opts_def} )
00154       || croak 'Could not parse command line arguments';
00155     return $opts;
00156 }
00157 
00158 =head2 get_dba_args_for_opts()
00159 
00160     Arg [1]     : Hash of options (e.g. parsed from command line options by process_args())
00161     Arg [2]     : If set to 1, the databases are assumed to have a single species only. Default is 0.
00162     Arg [3]     : Optional prefix to use when parsing e.g. dna
00163     Description : Uses the parsed command line options to generate an array of DBAdaptor arguments 
00164                 : (e.g. expands dbpattern, finds all species_ids for multispecies databases)
00165                 : These can then be passed directly to Bio::EnsEMBL::DBSQL::DBAdaptor->new()
00166     Returntype  : Arrayref of DBA argument hash refs 
00167     Status      : Under development
00168 
00169 =cut
00170 
00171 sub get_dba_args_for_opts {
00172     my ( $self, $opts, $single_species, $prefix ) = @_;
00173     $prefix ||= '';
00174     $single_species ||= 0;
00175     my ( $host, $port, $user, $pass, $dbname, $pattern, $driver ) =
00176       map { $prefix . $_ } qw(host port user pass dbname pattern driver);
00177     my @db_args;
00178     my $dbc =
00179       Bio::EnsEMBL::DBSQL::DBConnection->new( -USER   => $opts->{$user},
00180                                               -PASS   => $opts->{$pass},
00181                                               -HOST   => $opts->{$host},
00182                                               -PORT   => $opts->{$port},
00183                                               -DRIVER => $opts->{$driver} );
00184     my @dbnames;
00185     if ( defined $opts->{$pattern} ) {
00186         # get a basic DBConnection and use to find out which dbs are involved
00187         @dbnames =
00188           grep { m/$opts->{pattern}/smx }
00189           @{ $dbc->sql_helper()->execute_simple(q/SHOW DATABASES/) };
00190     } elsif ( defined $opts->{$dbname} ) {
00191         push @dbnames, $opts->{$dbname};
00192     } else {
00193         print Dumper($opts);
00194         croak 'dbname or dbpattern arguments required';
00195     }
00196 
00197     for my $dbname (@dbnames) {
00198 
00199         my $multi       = 0;
00200         my @species_ids = qw/1/;
00201         if ( !$single_species ) {
00202             @species_ids = @{
00203                 $dbc->sql_helper()->execute_simple(
00204 "SELECT DISTINCT(species_id) FROM $dbname.meta WHERE species_id>0" ) };
00205             if ( scalar(@species_ids) > 1 ) {
00206                 $multi = 1;
00207             }
00208             if ( defined $opts->{species_id} ) {
00209                 @species_ids = ( $opts->{species_id} );
00210             }
00211         }
00212         for my $species_id (@species_ids) {
00213             push @db_args, {
00214                 -HOST            => $opts->{$host},
00215                 -USER            => $opts->{$user},
00216                 -PORT            => $opts->{$port},
00217                 -PASS            => $opts->{$pass},
00218                 -DBNAME          => $dbname,
00219                 -DRIVER          => $opts->{$driver},
00220                 -SPECIES_ID      => $species_id,
00221                 -MULTISPECIES_DB => $multi };
00222         }
00223     } ## end for my $dbname (@dbnames)
00224     return \@db_args;
00225 } ## end sub get_dba_args_for_opts
00226 
00227 =head2 get_dba_args_for_opts()
00228 
00229     Arg [1]     : Hash of options (e.g. parsed from command line options by process_args())
00230     Arg [2]     : If set to 1, the databases are assumed to have a single species only. Default is 0.
00231     Arg [3]     : Optional prefix to use when parsing e.g. dna
00232     Description : Uses the parsed command line options to generate an array DBAdaptors. 
00233                 : Note this can overload connections on a server
00234     Returntype  : Arrayref of Bio::EnsEMBL::DBSQL::DBAdaptor
00235     Status      : Under development
00236 
00237 =cut
00238 
00239 sub get_dbas_for_opts {
00240     my ( $self, $opts, $single_species, $prefix ) = @_;
00241 
00242 # get all the DBA details that we want to work with and create DBAs for each in turn
00243     my $dbas;
00244     for my $args ( @{ $self->get_dba_args_for_opts($opts, $single_species, $prefix) } ) {
00245         push @{$dbas}, Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$args} );
00246     }
00247     return $dbas;
00248 }
00249 1;