Archive Ensembl HomeArchive Ensembl Home
ConversionSupport.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
00024 schema conversion scripts
00025 
00026 =head1 SYNOPSIS
00027 
00028   my $serverroot = '/path/to/ensembl';
00029   my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
00030 
00031   # parse common options
00032   $support->parse_common_options;
00033 
00034   # parse extra options for your script
00035   $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' );
00036 
00037   # ask user if he wants to run script with these parameters
00038   $support->confirm_params;
00039 
00040   # see individual method documentation for more stuff
00041 
00042 =head1 DESCRIPTION
00043 
00044 This module is a collection of common methods and provides helper
00045 functions for the Vega release and schema conversion scripts. Amongst
00046 others, it reads options from a config file, parses commandline options
00047 and does logging.
00048 
00049 =head1 METHODS
00050 
00051 =cut
00052 
00053 package Bio::EnsEMBL::Utils::ConversionSupport;
00054 
00055 use strict;
00056 use warnings;
00057 no warnings 'uninitialized';
00058 
00059 use Getopt::Long;
00060 use Text::Wrap;
00061 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00062 use FindBin qw($Bin $Script);
00063 use POSIX qw(strftime);
00064 use Cwd qw(abs_path);
00065 use DBI;
00066 use Data::Dumper;
00067 
00068 my $species_c = 1; #counter to be used for each database connection made
00069 
00070 =head2 new
00071 
00072   Arg[1]      : String $serverroot - root directory of your ensembl sandbox
00073   Example     : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
00074                                         '/path/to/ensembl');
00075   Description : constructor
00076   Return type : Bio::EnsEMBL::Utils::ConversionSupport object
00077   Exceptions  : thrown if no serverroot is provided
00078   Caller      : general
00079 
00080 =cut
00081 
00082 sub new {
00083   my $class = shift;
00084   (my $serverroot = shift) or throw("You must supply a serverroot.");
00085   my $self = {
00086     '_serverroot'   => $serverroot,
00087     '_param'        => { interactive => 1 },
00088     '_warnings'     => 0,
00089   };
00090   bless ($self, $class);
00091   return $self;
00092 }
00093 
00094 =head2 parse_common_options
00095 
00096   Example     : $support->parse_common_options;
00097   Description : This method reads options from a configuration file and parses
00098                 some commandline options that are common to all scripts (like
00099                 db connection settings, help, dry-run). Commandline options
00100                 will override config file settings. 
00101 
00102                 All options will be accessible via $self->param('name').
00103   Return type : true on success 
00104   Exceptions  : thrown if configuration file can't be opened
00105   Caller      : general
00106 
00107 =cut
00108 
00109 sub parse_common_options {
00110   my $self = shift;
00111 
00112   # read commandline options
00113   my %h;
00114   Getopt::Long::Configure("pass_through");
00115   &GetOptions( \%h,
00116            'dbname|db_name=s',
00117            'host|dbhost|db_host=s',
00118            'port|dbport|db_port=n',
00119            'user|dbuser|db_user=s',
00120            'pass|dbpass|db_pass=s',
00121            'conffile|conf=s',
00122            'logfile|log=s',
00123                'nolog',
00124            'logpath=s',
00125                'log_base_path=s',
00126            'logappend|log_append',
00127            'verbose|v',
00128            'interactive|i=s',
00129            'dry_run|dry|n',
00130            'help|h|?',
00131          );
00132 
00133   # reads config file
00134   my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
00135   $conffile = abs_path($conffile);
00136   if (-e $conffile) {
00137     open(CONF, $conffile) or throw( 
00138       "Unable to open configuration file $conffile for reading: $!");
00139     my $serverroot = $self->serverroot;
00140     while (<CONF>) {
00141       chomp;
00142 
00143       # remove comments
00144       s/^[#;].*//;
00145       s/\s+[;].*$//;
00146 
00147       # read options into internal parameter datastructure, removing whitespace
00148       next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
00149       my $name = $1;
00150       my $val = $2;
00151       if ($val =~ /\$SERVERROOT/) {
00152     $val =~ s/\$SERVERROOT/$serverroot/g;
00153     $val = abs_path($val);
00154       }
00155       $self->param($name, $val);
00156     }
00157     $self->param('conffile', $conffile);
00158   }
00159   elsif ($conffile) {
00160     warning("Unable to open configuration file $conffile for reading: $!");
00161   }
00162 
00163 # override configured parameter with commandline options
00164   map { $self->param($_, $h{$_}) } keys %h;
00165 
00166 
00167   return (1) if $self->param('nolog');
00168 
00169   # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log
00170   if (! defined($self->param('log_base_path')))  {
00171     $self->param('log_base_path','/ensemblweb/shared/logs/conversion/');
00172   }
00173   my $dbname = $self->param('dbname');
00174   $dbname =~ s/^vega_//;
00175   if (not (defined($self->param('logpath')))){
00176     $self->param('logpath', $self->param('log_base_path')."/".$dbname."/" );
00177   }
00178   if ( not defined $self->param('logfile') ){
00179     my $log = $Script;
00180     $log =~ s/.pl$//g;
00181     my $counter;
00182     for ($counter=1 ; (-e $self->param('logpath')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){
00183 #        warn  $self->param('logpath')."/".$log."_".$counter.".log";
00184     }
00185     $self->param('logfile', $log."_".sprintf("%03d", $counter).".log");
00186   }
00187   return(1);
00188 }
00189 
00190 =head2 parse_extra_options
00191 
00192   Arg[1-N]    : option descriptors that will be passed on to Getopt::Long
00193   Example     : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
00194   Description : Parse extra commandline options by passing them on to
00195                 Getopt::Long and storing parameters in $self->param('name).
00196   Return type : true on success
00197   Exceptions  : none (caugth by $self->error)
00198   Caller      : general
00199 
00200 =cut
00201 
00202 sub parse_extra_options {
00203   my ($self, @params) = @_;
00204   Getopt::Long::Configure("no_pass_through");
00205   eval {
00206     # catch warnings to pass to $self->error
00207     local $SIG{__WARN__} = sub { die @_; };
00208     &GetOptions(\%{ $self->{'_param'} }, @params);
00209   };
00210   $self->error($@) if $@;
00211   return(1);
00212 }
00213 
00214 =head2 allowed_params
00215 
00216   Arg[1-N]    : (optional) List of allowed parameters to set
00217   Example     : my @allowed = $self->allowed_params(qw(param1 param2));
00218   Description : Getter/setter for allowed parameters. This is used by
00219                 $self->confirm_params() to avoid cluttering of output with
00220                 conffile entries not relevant for a given script. You can use
00221                 $self->get_common_params() as a shortcut to set them.
00222   Return type : Array - list of allowed parameters
00223   Exceptions  : none
00224   Caller      : general
00225 
00226 =cut
00227 
00228 sub allowed_params {
00229   my $self = shift;
00230 
00231   # setter
00232   if (@_) {
00233     @{ $self->{'_allowed_params'} } = @_;
00234   }
00235 
00236   # getter
00237   if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
00238     return @{ $self->{'_allowed_params'} };
00239   } else {
00240     return ();
00241   }
00242 }
00243 
00244 =head2 get_common_params
00245 
00246   Example     : my @allowed_params = $self->get_common_params, 'extra_param';
00247   Description : Returns a list of commonly used parameters in the conversion
00248                 scripts. Shortcut for setting allowed parameters with
00249                 $self->allowed_params().
00250   Return type : Array - list of common parameters
00251   Exceptions  : none
00252   Caller      : general
00253 
00254 =cut
00255 
00256 sub get_common_params {
00257   return qw(
00258         conffile
00259         dbname
00260         host
00261         port
00262         user
00263         pass
00264             nolog
00265         logpath
00266             log_base_path
00267         logfile
00268         logappend
00269         verbose
00270         interactive
00271         dry_run
00272       );
00273 }
00274 
00275 =head2 get_loutre_params
00276 
00277   Arg         : (optional) return a list to parse or not
00278   Example     : $support->parse_extra_options($support->get_loutre_params('parse'))
00279   Description : Returns a list of commonly used loutre db parameters - parse option is
00280                 simply used to distinguish between reporting and parsing parameters
00281   Return type : Array - list of common parameters
00282   Exceptions  : none
00283   Caller      : general
00284 
00285 =cut
00286 
00287 sub get_loutre_params {
00288   my ($self,$p) = @_;
00289   if ($p) {
00290     return qw(
00291           loutrehost=s
00292           loutreport=s
00293           loutreuser=s
00294           loutrepass=s
00295           loutredbname=s
00296         );
00297   }
00298   else {
00299     return qw(
00300           loutrehost
00301           loutreport
00302           loutreuser
00303           loutrepass
00304           loutredbname
00305         );
00306   }
00307 }
00308 
00309 =head2 remove_vega_params
00310 
00311   Example     : $support->remove_vega_params;
00312   Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
00313                 working exclusively with loutre
00314   Return type : none
00315   Exceptions  : none
00316   Caller      : general
00317 
00318 =cut
00319 
00320 sub remove_vega_params {
00321   my $self = shift;
00322   foreach my $param (qw(dbname host port user pass)) {
00323     $self->{'_param'}{$param} = undef;
00324   }
00325 }
00326 
00327 =head2 confirm_params
00328 
00329   Example     : $support->confirm_params;
00330   Description : Prints a table of parameters that were collected from config
00331                 file and commandline and asks user to confirm if he wants
00332                 to proceed.
00333   Return type : true on success
00334   Exceptions  : none
00335   Caller      : general
00336 
00337 =cut
00338 
00339 sub confirm_params {
00340   my $self = shift;
00341 
00342   # print parameter table
00343   print "Running script with these parameters:\n\n";
00344   print $self->list_all_params;
00345 
00346   if ($self->param('host') eq 'ensdb-1-10') {
00347     # ask user if he wants to proceed
00348     exit unless $self->user_proceed("**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************");
00349   }
00350   else {
00351     # ask user if he wants to proceed
00352     exit unless $self->user_proceed("Continue?");
00353   }
00354   return(1);
00355 }
00356 
00357 =head2 list_all_params
00358 
00359   Example     : print LOG $support->list_all_params;
00360   Description : prints a table of the parameters used in the script
00361   Return type : String - the table to print
00362   Exceptions  : none
00363   Caller      : general
00364 
00365 =cut
00366 
00367 sub list_all_params {
00368   my $self = shift;
00369   my $txt = sprintf "    %-21s%-40s\n", qw(PARAMETER VALUE);
00370   $txt .= "    " . "-"x71 . "\n";
00371   $Text::Wrap::colums = 72;
00372   my @params = $self->allowed_params;
00373   foreach my $key (@params) {
00374     my @vals = $self->param($key);
00375     if (@vals) {
00376       $txt .= Text::Wrap::wrap( sprintf('   %-21s', $key),
00377                 ' 'x24,
00378                 join(", ", @vals)
00379                   ) . "\n";
00380     }
00381   }
00382   $txt .= "\n";
00383   return $txt;
00384 }
00385 
00386 =head2 create_commandline_options
00387 
00388   Arg[1]      : Hashref $settings - hashref describing what to do
00389                 Allowed keys:
00390                     allowed_params => 0|1   # use all allowed parameters
00391                     exclude => []           # listref of parameters to exclude
00392                     replace => {param => newval} # replace value of param with
00393                                                  # newval
00394   Example     : $support->create_commandline_options({
00395                     allowed_params => 1,
00396                     exclude => ['verbose'],
00397                     replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
00398                 });
00399   Description : Creates a commandline options string that can be passed to any
00400                 other script using ConversionSupport.
00401   Return type : String - commandline options string
00402   Exceptions  : none
00403   Caller      : general
00404 
00405 =cut
00406 
00407 sub create_commandline_options {
00408   my ($self, $settings, $param_hash) = @_;
00409   my %param_hash = $param_hash ? %$param_hash : ();
00410 
00411   # get all allowed parameters
00412   if ($settings->{'allowed_params'}) {
00413     # exclude params explicitly stated
00414     my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] };
00415     foreach my $param ($self->allowed_params) {
00416       unless ($exclude{$param}) {
00417     my ($first, @rest) = $self->param($param);
00418     next unless (defined($first));
00419     
00420     if (@rest) {
00421       $first = join(",", $first, @rest);
00422     }
00423     $param_hash{$param} = $first;
00424       }
00425     }
00426   }
00427 
00428   # replace values
00429   foreach my $key (keys %{ $settings->{'replace'} || {} }) {
00430     $param_hash{$key} = $settings->{'replace'}->{$key};
00431   }
00432 
00433   # create the commandline options string
00434   my $options_string;
00435   foreach my $param (keys %param_hash) {
00436     $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
00437   }
00438   return $options_string;
00439 }
00440 
00441 =head2 check_required_params
00442 
00443   Arg[1-N]    : List @params - parameters to check
00444   Example     : $self->check_required_params(qw(dbname host port));
00445   Description : Checks $self->param to make sure the requested parameters
00446                 have been set. Dies if parameters are missing.
00447   Return type : true on success
00448   Exceptions  : none
00449   Caller      : general
00450 
00451 =cut
00452 
00453 sub check_required_params {
00454   my ($self, @params) = @_;
00455   my @missing = ();
00456   foreach my $param (@params) {
00457     push @missing, $param unless $self->param($param);
00458   }
00459   if (@missing) {
00460     throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
00461   }
00462   return(1);
00463 }
00464 
00465 =head2 user_proceed
00466 
00467   Arg[1]      : (optional) String $text - notification text to present to user
00468   Example     : # run a code snipped conditionally
00469                 if ($support->user_proceed("Run the next code snipped?")) {
00470                     # run some code
00471                 }
00472 
00473                 # exit if requested by user
00474                 exit unless ($support->user_proceed("Want to continue?"));
00475   Description : If running interactively, the user is asked if he wants to
00476                 perform a script action. If he doesn't, this section is skipped
00477                 and the script proceeds with the code. When running
00478                 non-interactively, the section is run by default.
00479   Return type : TRUE to proceed, FALSE to skip.
00480   Exceptions  : none
00481   Caller      : general
00482 
00483 =cut
00484 
00485 sub user_proceed {
00486   my ($self, $text) = @_;
00487 
00488   if ($self->param('interactive')) {
00489     print "$text\n" if $text;
00490     print "[y/N] ";
00491     my $input = lc(<>);
00492     chomp $input;
00493     unless ($input eq 'y') {
00494       print "Skipping.\n";
00495       return(0);
00496     }
00497   }
00498 
00499   return(1);
00500 }
00501 
00502 =head2 user_confirm
00503 
00504   Description : DEPRECATED - please use user_proceed() instead
00505 
00506 =cut
00507 
00508 sub user_confirm {
00509   my $self = shift;
00510   exit unless $self->user_proceed("Continue?");
00511 }
00512 
00513 =head2 read_user_input
00514 
00515   Arg[1]      : (optional) String $text - notification text to present to user
00516   Example     : my $ret = $support->read_user_input("Choose a number [1/2/3]");
00517                 if ($ret == 1) {
00518                     # do something
00519                 } elsif ($ret == 2) {
00520                     # do something else
00521                 }
00522   Description : If running interactively, the user is asked for input.
00523   Return type : String - user's input
00524   Exceptions  : none
00525   Caller      : general
00526 
00527 =cut
00528 
00529 sub read_user_input {
00530   my ($self, $text) = @_;
00531 
00532   if ($self->param('interactive')) {
00533     print "$text\n" if $text;
00534     my $input = <>;
00535     chomp $input;
00536     return $input;
00537   }
00538 }
00539 
00540 =head2 comma_to_list
00541 
00542   Arg[1-N]    : list of parameter names to parse
00543   Example     : $support->comma_to_list('chromosomes');
00544   Description : Transparently converts comma-separated lists into arrays (to
00545                 allow different styles of commandline options, see perldoc
00546                 Getopt::Long for details). Parameters are converted in place
00547                 (accessible through $self->param('name')).
00548   Return type : true on success
00549   Exceptions  : none
00550   Caller      : general
00551 
00552 =cut
00553 
00554 sub comma_to_list {
00555   my $self = shift;
00556   foreach my $param (@_) {
00557     $self->param($param,
00558          split (/,/, join (',', $self->param($param))));
00559   }
00560   return(1);
00561 }
00562 
00563 =head2 list_or_file
00564 
00565   Arg[1]      : Name of parameter to parse
00566   Example     : $support->list_or_file('gene');
00567   Description : Determines whether a parameter holds a list or it is a filename
00568                 to read the list entries from.
00569   Return type : true on success
00570   Exceptions  : thrown if list file can't be opened
00571   Caller      : general
00572 
00573 =cut
00574 
00575 sub list_or_file {
00576   my ($self, $param) = @_;
00577   my @vals = $self->param($param);
00578   return unless (@vals);
00579 
00580   my $firstval = $vals[0];
00581   if (scalar(@vals) == 1 && -e $firstval) {
00582     # we didn't get a list of values, but a file to read values from
00583     @vals = ();
00584     open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
00585     while(<IN>){
00586       chomp;
00587       push(@vals, $_);
00588     }
00589     close(IN);
00590     $self->param($param, @vals);
00591   }
00592   $self->comma_to_list($param);
00593   return(1);
00594 }
00595 
00596 =head2 param
00597 
00598   Arg[1]      : Parameter name
00599   Arg[2-N]    : (optional) List of values to set
00600   Example     : my $dbname = $support->param('dbname');
00601                 $support->param('port', 3306);
00602                 $support->param('chromosomes', 1, 6, 'X');
00603   Description : Getter/setter for parameters. Accepts single-value params and
00604                 list params.
00605   Return type : Scalar value for single-value parameters, array of values for
00606                 list parameters
00607   Exceptions  : thrown if no parameter name is supplied
00608   Caller      : general
00609 
00610 =cut
00611 
00612 sub param {
00613   my $self = shift;
00614   my $name = shift or throw("You must supply a parameter name");
00615 
00616   # setter
00617   if (@_) {
00618     if (scalar(@_) == 1) {
00619       # single value
00620       $self->{'_param'}->{$name} = shift;
00621     } else {
00622       # list of values
00623       undef $self->{'_param'}->{$name};
00624       @{ $self->{'_param'}->{$name} } = @_;
00625     }
00626   }
00627 
00628   # getter
00629   if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
00630     # list parameter
00631     return @{ $self->{'_param'}->{$name} };
00632   } elsif (defined($self->{'_param'}->{$name})) {
00633     # single-value parameter
00634     return $self->{'_param'}->{$name};
00635   } else {
00636     return ();
00637   }
00638 }
00639 
00640 =head2 error
00641 
00642   Arg[1]      : (optional) String - error message
00643   Example     : $support->error("An error occurred: $@");
00644                 exit(0) if $support->error;
00645   Description : Getter/setter for error messages
00646   Return type : String - error message
00647   Exceptions  : none
00648   Caller      : general
00649 
00650 =cut
00651 
00652 sub error {
00653   my $self = shift;
00654   $self->{'_error'} = shift if (@_);
00655   return $self->{'_error'};
00656 }
00657 
00658 =head2 warnings
00659 
00660   Example     : print LOG "There were ".$support->warnings." warnings.\n";
00661   Description : Returns the number of warnings encountered while running the
00662                 script (the warning counter is increased by $self->log_warning).
00663   Return type : Int - number of warnings
00664   Exceptions  : none
00665   Caller      : general
00666 
00667 =cut
00668 
00669 sub warnings {
00670   my $self = shift;
00671   return $self->{'_warnings'};
00672 }
00673 
00674 =head2 serverroot
00675 
00676   Arg[1]      : (optional) String - root directory of your ensembl sandbox
00677   Example     : my $serverroot = $support->serverroot;
00678   Description : Getter/setter for the root directory of your ensembl sandbox.
00679                 This is set when ConversionSupport object is created, so
00680                 usually only used as a getter.
00681   Return type : String - the server root directory
00682   Exceptions  : none
00683   Caller      : general
00684 
00685 =cut
00686 
00687 sub serverroot {
00688   my $self = shift;
00689   $self->{'_serverroot'} = shift if (@_);
00690   return $self->{'_serverroot'};
00691 }
00692 
00693 =head2 get_database
00694 
00695   Arg[1]      : String $database - the type of database to connect to
00696                 (eg core, otter)
00697   Arg[2]      : (optional) String $prefix - the prefix used for retrieving the
00698                 connection settings from the configuration
00699   Example     : my $db = $support->get_database('core');
00700   Description : Connects to the database specified.
00701   Return type : DBAdaptor of the appropriate type
00702   Exceptions  : thrown if asking for unknown database
00703   Caller      : general
00704 
00705 =cut
00706 
00707 sub get_database {
00708   my $self = shift;
00709   my $database = shift or throw("You must provide a database");
00710   my $prefix = shift || '';
00711   $self->check_required_params(
00712     "${prefix}host",
00713     "${prefix}port",
00714     "${prefix}user",
00715     "${prefix}dbname",
00716   );
00717   my %adaptors = (
00718     core    => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
00719     ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
00720     evega   => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
00721     otter   => 'Bio::Otter::DBSQL::DBAdaptor',
00722     vega    => 'Bio::Otter::DBSQL::DBAdaptor',
00723     compara => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
00724     loutre  => 'Bio::Vega::DBSQL::DBAdaptor',
00725   );
00726   throw("Unknown database: $database") unless $adaptors{$database};
00727 
00728   $self->dynamic_use($adaptors{$database});
00729   my $species = 'species' . $species_c;
00730   my $dba = $adaptors{$database}->new(
00731     -host    => $self->param("${prefix}host"),
00732     -port    => $self->param("${prefix}port"),
00733     -user    => $self->param("${prefix}user"),
00734     -pass    => $self->param("${prefix}pass") || '',
00735     -dbname  => $self->param("${prefix}dbname"),
00736     -group   => $database,
00737     -species => $species,
00738   );
00739   #can use this approach to get dna from another db
00740 #  my $dna_db = $adaptors{$database}->new(
00741 #    -host => 'otterlive',
00742 #    -port => '3301',
00743 #    -user => $self->param("${prefix}user"),
00744 #    -pass => $self->param("${prefix}pass"),
00745 #    -dbname => 'loutre_human',
00746 #  );
00747 #  $dba->dnadb($dna_db);
00748 
00749   # otherwise explicitely set the dnadb to itself - by default the Registry assumes
00750   # a group 'core' for this now
00751   $dba->dnadb($dba);
00752 
00753   $species_c++;
00754 
00755   $self->{'_dba'}->{$database} = $dba;
00756   $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'};
00757   return $self->{'_dba'}->{$database};
00758 }
00759 
00760 
00761 =head2 get_dbconnection
00762 
00763   Arg[1]      : (optional) String $prefix - the prefix used for retrieving the
00764                 connection settings from the configuration
00765   Example     : my $dbh = $self->get_dbconnection;
00766   Description : Connects to the database server specified. You don't have to
00767                 specify a database name (this is useful for running commands
00768                 like $dbh->do('show databases')).
00769   Return type : DBI database handle
00770   Exceptions  : thrown if connection fails
00771   Caller      : general
00772   Status      : At Risk
00773 
00774 =cut
00775 
00776 sub get_dbconnection {
00777   my $self = shift;
00778   my $prefix = shift;
00779  
00780   $self->check_required_params(
00781       "${prefix}host",
00782       "${prefix}port",
00783       "${prefix}user",
00784   );
00785 
00786   my $dsn = "DBI:" . ($self->param('driver')||'mysql') .
00787             ":host=" . $self->param("${prefix}host") .
00788             ";port=" . $self->param("${prefix}port");
00789 
00790   if ($self->param("${prefix}dbname")) {
00791     $dsn .= ";dbname=".$self->param("${prefix}dbname");
00792   }
00793 
00794 #  warn $dsn;
00795 
00796   my $dbh;
00797   eval{
00798     $dbh = DBI->connect($dsn, $self->param("${prefix}user"),
00799       $self->param("${prefix}pass"), {'RaiseError' => 1, 'PrintError' => 0});
00800   };
00801 
00802   if (!$dbh || $@ || !$dbh->ping) {
00803     $self->log_error("Could not connect to db server as user ".
00804       $self->param("${prefix}user") .
00805       " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
00806   }
00807 
00808   $self->{'_dbh'} = $dbh;
00809   return $self->{'_dbh'};
00810 
00811 }
00812 
00813 
00814 =head2 dba
00815 
00816   Arg[1]      : (optional) String $database - type of db apaptor to retrieve
00817   Example     : my $dba = $support->dba;
00818   Description : Getter for database adaptor. Returns default (i.e. created
00819                 first) db adaptor if no argument is provided.
00820   Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
00821   Exceptions  : none
00822   Caller      : general
00823 
00824 =cut
00825 
00826 sub dba {
00827   my ($self, $database) = shift;
00828   return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'};
00829 }
00830 
00831 =head2 dynamic_use
00832 
00833   Arg [1]    : String $classname - The name of the class to require/import
00834   Example    : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor');
00835   Description: Requires and imports the methods for the classname provided,
00836                checks the symbol table so that it doesnot re-require modules
00837                that have already been required.
00838   Returntype : true on success
00839   Exceptions : Warns to standard error if module fails to compile
00840   Caller     : internal
00841 
00842 =cut
00843 
00844 sub dynamic_use {
00845   my ($self, $classname) = @_;
00846   my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::', $classname);
00847 
00848   no strict 'refs';
00849   # return if module has already been imported
00850   return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} };
00851 
00852   eval "require $classname";
00853   throw("Failed to require $classname: $@") if ($@);
00854   $classname->import();
00855 
00856   return 1;
00857 }
00858 
00859 =head2 get_chrlength
00860 
00861   Arg[1]      : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
00862   Arg[2]      : (optional) String $version - coord_system version
00863   Arg[3]      : (optional) String $type - type of region eg chromsome (defaults to 'toplevel')
00864   Arg[4]      : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
00865   Example     : my $chr_length = $support->get_chrlength($dba);
00866   Description : Get all chromosomes and their length from the database. Return
00867                 chr_name/length for the chromosomes the user requested (or all
00868                 chromosomes by default)
00869   Return type : Hashref - chromosome_name => length
00870   Exceptions  : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
00871   Caller      : general
00872 
00873 =cut
00874 
00875 sub get_chrlength {
00876   my ($self, $dba, $version,$type,$include_non_reference) = @_;
00877   $dba  ||= $self->dba;
00878   $type ||= 'toplevel';
00879   throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
00880     unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
00881 
00882   my $sa = $dba->get_SliceAdaptor;
00883 
00884   my @chromosomes = map { $_->seq_region_name } 
00885     @{ $sa->fetch_all($type,$version,$include_non_reference) };
00886   my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;
00887 
00888   my @wanted = $self->param('chromosomes');
00889   if (@wanted) {
00890     # check if user supplied invalid chromosome names
00891     foreach my $chr (@wanted) {
00892       my $found = 0;
00893       foreach my $chr_from_db (keys %chr) {
00894     if ($chr_from_db eq $chr) {
00895       $found = 1;
00896       last;
00897     }
00898       }
00899       unless ($found) {
00900     warning("Didn't find chromosome $chr in database " .
00901           $self->param('dbname'));
00902       }
00903     }
00904 
00905     # filter to requested chromosomes only
00906   HASH:
00907     foreach my $chr_from_db (keys %chr) {
00908       foreach my $chr (@wanted) {
00909     if ($chr_from_db eq $chr) {
00910       next HASH;
00911     }
00912       }
00913       delete($chr{$chr_from_db});
00914     }
00915   }
00916 
00917   return \%chr;
00918 }
00919 
00920 =head2 get_ensembl_chr_mapping
00921 
00922   Arg[1]      : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
00923   Arg[2]      : (optional) String $version - coord_system version
00924   Example     : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
00925   Description : Gets a mapping between Vega chromosome names and their
00926                 equivalent Ensembl chromosomes. Works with non-reference chromosomes
00927   Return type : Hashref - Vega name => Ensembl name
00928   Exceptions  : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
00929   Caller      : general
00930 
00931 =cut
00932 
00933 sub get_ensembl_chr_mapping {
00934   my ($self, $dba, $version) = @_;
00935   $dba ||= $self->dba;
00936   throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
00937 
00938   my $sa = $dba->get_SliceAdaptor;
00939   my @chromosomes = map { $_->seq_region_name } 
00940     @{ $sa->fetch_all('chromosome', $version, 1) };
00941 
00942   my %chrs;
00943   foreach my $chr (@chromosomes) {
00944     my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
00945     my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
00946     if ($ensembl_name_attr) {
00947       $chrs{$chr} = $ensembl_name_attr->value;
00948     } else {
00949       $chrs{$chr} = $chr;
00950     }
00951   }
00952   return \%chrs;
00953 }
00954 
00955 =head2 get_taxonomy_id
00956 
00957   Arg[1]      : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
00958   Example     : my $sid = $support->get_taxonony_id($dba);
00959   Description : Retrieves the taxononmy ID from the meta table
00960   Return type : Int - the taxonomy ID
00961   Exceptions  : thrown if no taxonomy ID is found in the database
00962   Caller      : general
00963 
00964 =cut
00965 
00966 sub get_taxonomy_id {
00967   my ($self, $dba) = @_;
00968   $dba ||= $self->dba;
00969   my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"';
00970   my $sth = $dba->dbc->db_handle->prepare($sql);
00971   $sth->execute;
00972   my ($tid) = $sth->fetchrow_array;
00973   $sth->finish;
00974   $self->throw("Could not determine taxonomy_id from database.") unless $tid;
00975   return $tid;
00976 }
00977 
00978 =head2 get_species_scientific_name
00979 
00980   Arg[1]      : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
00981   Example     : my $species = $support->get_species_scientific_name($dba);
00982   Description : Retrieves the species scientific name (Genus species) from the
00983                 meta table
00984   Return type : String - species scientific name
00985   Exceptions  : thrown if species name can not be determined from db
00986   Caller      : general
00987 
00988 =cut
00989 
00990 sub get_species_scientific_name {
00991   my ($self, $dba) = @_;
00992   $dba ||= $self->dba;
00993   my $sql_tmp = "SELECT meta_value FROM meta WHERE meta_key = \'species.classification\' ORDER BY meta_id";
00994   my $sql = $dba->dbc->add_limit_clause($sql_tmp,2);
00995   my $sth = $dba->dbc->db_handle->prepare($sql);
00996   $sth->execute;
00997   my @sp;
00998   while (my @row = $sth->fetchrow_array) {
00999     push @sp, $row[0];
01000   }
01001   $sth->finish;
01002   my $species = join(" ", reverse @sp);
01003   $self->throw("Could not determine species scientific name from database.")
01004     unless $species;
01005   return $species;
01006 }
01007 
01008 =head2 species
01009 
01010   Arg[1]      : (optional) String $species - species name to set
01011   Example     : my $species = $support->species;
01012                 my $url = "http://vega.sanger.ac.uk/$species/";
01013   Description : Getter/setter for species name (Genus_species). If not set, it's
01014                 determined from database's meta table
01015   Return type : String - species name
01016   Exceptions  : none
01017   Caller      : general
01018 
01019 =cut
01020 
01021 sub species {
01022   my $self = shift;
01023   $self->{'_species'} = shift if (@_);
01024   # get species name from database if not set
01025   unless ($self->{'_species'}) {
01026     $self->{'_species'} = join('_',
01027                    split(/ /, $self->get_species_scientific_name));
01028   }
01029   return $self->{'_species'};
01030 }
01031 
01032 =head2 sort_chromosomes
01033 
01034   Arg[1]      : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
01035   Example     : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 };
01036                 my @sorted = $support->sort_chromosomes($chr);
01037   Description : Sorts chromosomes in an intuitive way (numerically, then
01038                 alphabetically). If no chromosome hashref is passed, it's
01039                 retrieve by calling $self->get_chrlength()
01040   Return type : List - sorted chromosome names
01041   Exceptions  : thrown if no hashref is provided
01042   Caller      : general
01043 
01044 =cut
01045 
01046 sub sort_chromosomes {
01047   my ($self, $chr_hashref) = @_;
01048   $chr_hashref = $self->get_chrlength unless ($chr_hashref);
01049   throw("You have to pass a hashref of your chromosomes")
01050     unless ($chr_hashref and ref($chr_hashref) eq 'HASH');
01051   return (sort _by_chr_num keys %$chr_hashref);
01052 }
01053 
01054 =head2 _by_chr_num
01055 
01056   Example     : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
01057   Description : Subroutine to use in sort for sorting chromosomes. Sorts
01058                 numerically, then alphabetically
01059   Return type : values to be used by sort
01060   Exceptions  : none
01061   Caller      : internal ($self->sort_chromosomes)
01062 
01063 =cut
01064 
01065 sub _by_chr_num {
01066   my @awords = split /-/, $a;
01067   my @bwords = split /-/, $b;
01068 
01069   my $anum = $awords[0];
01070   my $bnum = $bwords[0];
01071 
01072   if ($anum !~ /^[0-9]*$/) {
01073     if ($bnum !~ /^[0-9]*$/) {
01074       return $anum cmp $bnum;
01075     } else {
01076       return 1;
01077     }
01078   }
01079   if ($bnum !~ /^[0-9]*$/) {
01080     return -1;
01081   }
01082 
01083   if ($anum <=> $bnum) {
01084     return $anum <=> $bnum;
01085   } else {
01086     if ($#awords == 0) {
01087       return -1;
01088     } elsif ($#bwords == 0) {
01089       return 1;
01090     } else {
01091       return $awords[1] cmp $bwords[1];
01092     }
01093   }
01094 }
01095 
01096 =head2 split_chromosomes_by_size
01097 
01098   Arg[1]      : (optional) Int $cutoff - the cutoff in bp between small and
01099                 large chromosomes
01100   Arg[2]      : (optional) Boolean to include duplicate regions, ie PAR or not
01101                 (default is no)
01102   Arg[3]      : (optional) Coordsystem version to retrieve
01103 
01104   Example     : my $chr_slices = $support->split_chromosomes_by_size;
01105                 foreach my $block_size (keys %{ $chr_slices }) {
01106                     print "Chromosomes with blocksize $block_size: ";
01107                     print join(", ", map { $_->seq_region_name }
01108                                         @{ $chr_slices->{$block_size} });
01109                 }
01110   Description : Determines block sizes for storing DensityFeatures on
01111                 chromosomes, and return slices for each chromosome. The block
01112                 size is determined so that you have 150 bins for the smallest
01113                 chromosome over 5 Mb in length. For chromosomes smaller than 5
01114                 Mb, an additional smaller block size is used to yield 150 bins
01115                 for the overall smallest chromosome. This will result in
01116                 reasonable resolution for small chromosomes and high
01117                 performance for big ones. Does not return non-reference seq_regions
01118   Return type : Hashref (key: block size; value: Arrayref of chromosome
01119                 Bio::EnsEMBL::Slices)
01120   Exceptions  : none
01121   Caller      : density scripts
01122 
01123 =cut
01124 
01125 sub split_chromosomes_by_size {
01126   my $self   = shift;
01127   my $cutoff = shift || 5000000;
01128   my $dup    = shift || 0;
01129   my $cs_version = shift;
01130   my $slice_adaptor = $self->dba->get_SliceAdaptor;
01131   my $top_slices;
01132   if ($self->param('chromosomes')) {
01133     foreach my $chr ($self->param('chromosomes')) {
01134       push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
01135     }
01136   } else {
01137     $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,0,$dup);
01138   }
01139 
01140   my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
01141   foreach my $slice (@{ $top_slices }) {
01142     next if ($slice->length eq 10000); #hack for chrY pseudoslice
01143     if ($slice->length < $cutoff) {
01144       if (! $min_small_chr or ($min_small_chr > $slice->length)) {
01145     $min_small_chr = $slice->length;
01146       }
01147       # push small chromosomes onto $small_chr
01148       push @{ $small_chr }, $slice;
01149     }
01150     elsif (! $min_big_chr or ($min_big_chr > $slice->length) ){
01151       $min_big_chr = $slice->length;
01152     }
01153     # push _all_ chromosomes onto $big_chr
01154     push @{ $big_chr }, $slice;
01155   }
01156   my $chr_slices;
01157   $chr_slices->{int($min_big_chr/150)}   = $big_chr if $min_big_chr;
01158   $chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr;
01159   return $chr_slices;
01160 }
01161 
01162 =head2 log
01163 
01164   Arg[1]      : String $txt - the text to log
01165   Arg[2]      : Int $indent - indentation level for log message
01166   Example     : my $log = $support->log_filehandle;
01167                 $support->log('Log foo.\n', 1);
01168   Description : Logs a message to the filehandle initialised by calling
01169                 $self->log_filehandle(). You can supply an indentation level
01170                 to get nice hierarchical log messages.
01171   Return type : true on success
01172   Exceptions  : thrown when no filehandle can be obtained
01173   Caller      : general
01174 
01175 =cut
01176 
01177 sub log {
01178   my ($self, $txt, $indent) = @_;
01179   $indent ||= 0;
01180 
01181   # strip off leading linebreaks so that indenting doesn't break
01182   $txt =~ s/^(\n*)//;
01183 
01184   $txt = $1."    "x$indent . $txt;
01185   my $fh = $self->{'_log_filehandle'};
01186   throw("Unable to obtain log filehandle") unless $fh;
01187   print $fh "$txt";
01188   return(1);
01189 }
01190 
01191 =head2 log_warning
01192 
01193   Arg[1]      : String $txt - the warning text to log
01194   Arg[2]      : Int $indent - indentation level for log message
01195   Arg[3]      : Bool - add a line break before warning if true
01196   Example     : my $log = $support->log_filehandle;
01197                 $support->log_warning('Log foo.\n', 1);
01198   Description : Logs a message via $self->log and increases the warning counter.
01199   Return type : true on success
01200   Exceptions  : none
01201   Caller      : general
01202 
01203 =cut
01204 
01205 sub log_warning {
01206   my ($self, $txt, $indent, $break) = @_;
01207   $txt = "WARNING: " . $txt;
01208   $txt = "\n$txt" if ($break);
01209   $self->log($txt, $indent);
01210   $self->{'_warnings'}++;
01211   return(1);
01212 }
01213 
01214 =head2 log_error
01215 
01216   Arg[1]      : String $txt - the error text to log
01217   Arg[2]      : Int $indent - indentation level for log message
01218   Example     : my $log = $support->log_filehandle;
01219                 $support->log_error('Log foo.\n', 1);
01220   Description : Logs a message via $self->log and exits the script.
01221   Return type : none
01222   Exceptions  : none
01223   Caller      : general
01224 
01225 =cut
01226 
01227 sub log_error {
01228   my ($self, $txt, $indent) = @_;
01229   $txt = "ERROR: ".$txt;
01230   $self->log($txt, $indent);
01231   $self->log("Exiting.\n");
01232   exit;
01233 }
01234 
01235 =head2 log_verbose
01236 
01237   Arg[1]      : String $txt - the warning text to log
01238   Arg[2]      : Int $indent - indentation level for log message
01239   Example     : my $log = $support->log_filehandle;
01240                 $support->log_verbose('Log this verbose message.\n', 1);
01241   Description : Logs a message via $self->log if --verbose option was used
01242   Return type : TRUE on success, FALSE if not verbose
01243   Exceptions  : none
01244   Caller      : general
01245 
01246 =cut
01247 
01248 sub log_verbose {
01249   my ($self, $txt, $indent) = @_;
01250   return(0) unless $self->param('verbose');
01251   $self->log($txt, $indent);
01252   return(1);
01253 }
01254 
01255 =head2 log_stamped
01256 
01257   Arg[1]      : String $txt - the warning text to log
01258   Arg[2]      : Int $indent - indentation level for log message
01259   Example     : my $log = $support->log_filehandle;
01260                 $support->log_stamped('Log this stamped message.\n', 1);
01261   Description : Appends timestamp and memory usage to a message and logs it via
01262                 $self->log
01263   Return type : TRUE on success
01264   Exceptions  : none
01265   Caller      : general
01266 
01267 =cut
01268 
01269 sub log_stamped {
01270   my ($self, $txt, $indent) = @_;
01271   # append timestamp and memory usage to log text
01272   $txt =~ s/(\n*)$//;
01273   $txt .= " ".$self->date_and_mem.$1;
01274   $self->log($txt, $indent);
01275   return(1);
01276 }
01277 
01278 =head2 log_filehandle
01279 
01280   Arg[1]      : (optional) String $mode - file access mode
01281   Example     : my $log = $support->log_filehandle;
01282                 # print to the filehandle
01283                 print $log 'Lets start logging...\n';
01284                 # log via the wrapper $self->log()
01285                 $support->log('Another log message.\n');
01286   Description : Returns a filehandle for logging (STDERR by default, logfile if
01287                 set from config or commandline). You can use the filehandle
01288                 directly to print to, or use the smart wrapper $self->log().
01289                 Logging mode (truncate or append) can be set by passing the
01290                 mode as an argument to log_filehandle(), or with the
01291                 --logappend commandline option (default: truncate)
01292   Return type : Filehandle - the filehandle to log to
01293   Exceptions  : thrown if logfile can't be opened
01294   Caller      : general
01295 
01296 =cut
01297 
01298 sub log_filehandle {
01299   my ($self, $mode) = @_;
01300   $mode ||= '>';
01301   $mode = '>>' if ($self->param('logappend'));
01302   my $fh = \*STDERR;
01303   if (my $logfile = $self->param('logfile')) {
01304     if (my $logpath = $self->param('logpath')) {
01305       unless (-e $logpath) {
01306     system("mkdir $logpath") == 0 or
01307       $self->log_error("Can't create log dir $logpath: $!\n");
01308       }
01309       $logfile = "$logpath/$logfile";
01310     }
01311     open($fh, "$mode", $logfile) or throw(
01312       "Unable to open $logfile for writing: $!");
01313   }
01314   $self->{'_log_filehandle'} = $fh;
01315   return $self->{'_log_filehandle'};
01316 }
01317 
01318 =head2 filehandle
01319 
01320   Arg[1]      : String $mode - file access mode
01321   Arg[2]      : String $file - input or output file
01322   Example     : my $fh = $support->filehandle('>>', '/path/to/file');
01323                 # print to the filehandle
01324                 print $fh 'Your text goes here...\n';
01325   Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
01326                 by default) to print to or read from.
01327   Return type : Filehandle - the filehandle
01328   Exceptions  : thrown if file can't be opened
01329   Caller      : general
01330 
01331 =cut
01332 
01333 sub filehandle {
01334   my ($self, $mode, $file) = @_;
01335   $mode ||= ">";
01336   my $fh;
01337   if ($file) {
01338     open($fh, "$mode", $file) or throw(
01339       "Unable to open $file for writing: $!");
01340   } elsif ($mode =~ />/) {
01341     $fh = \*STDOUT;
01342   } elsif ($mode =~ /</) {
01343     $fh = \*STDIN;
01344   }
01345   return $fh;
01346 }
01347 
01348 =head2 init_log
01349 
01350   Example     : $support->init_log;
01351   Description : Opens a filehandle to the logfile and prints some header
01352                 information to this file. This includes script name, date, user
01353                 running the script and parameters the script will be running
01354                 with.
01355   Return type : Filehandle - the log filehandle
01356   Exceptions  : none
01357   Caller      : general
01358 
01359 =cut
01360 
01361 sub init_log {
01362   my $self = shift;
01363 
01364   # get a log filehandle
01365   my $log = $self->log_filehandle;
01366 
01367   # print script name, date, user who is running it
01368   my $hostname = `hostname`;
01369   chomp $hostname;
01370   my $script = "$hostname:$Bin/$Script";
01371   my $user = `whoami`;
01372   chomp $user;
01373   $self->log("Script: $script\nDate: ".$self->date."\nUser: $user\n");
01374 
01375   # print parameters the script is running with
01376   $self->log("Parameters:\n\n");
01377   $self->log($self->list_all_params);
01378 
01379   # remember start time
01380   $self->{'_start_time'} = time;
01381 
01382   return $log;
01383 }
01384 
01385 =head2 finish_log
01386 
01387   Example     : $support->finish_log;
01388   Description : Writes footer information to a logfile. This includes the
01389                 number of logged warnings, timestamp and memory footprint.
01390   Return type : TRUE on success
01391   Exceptions  : none
01392   Caller      : general
01393 
01394 =cut
01395 
01396 sub finish_log {
01397   my $self = shift;
01398   $self->log("\nAll done. ".$self->warnings." warnings. ");
01399   if ($self->{'_start_time'}) {
01400     $self->log("Runtime ");
01401     my $diff = time - $self->{'_start_time'};
01402     my $sec = $diff % 60;
01403     $diff = ($diff - $sec) / 60;
01404     my $min = $diff % 60;
01405     my $hours = ($diff - $min) / 60;
01406     $self->log("${hours}h ${min}min ${sec}sec ");
01407   }
01408   $self->log($self->date_and_mem."\n\n");
01409   return(1);
01410 }
01411 
01412 =head2 date_and_mem
01413 
01414   Example     : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
01415   Description : Prints a timestamp and the memory usage of your script.
01416   Return type : String - timestamp and memory usage
01417   Exceptions  : none
01418   Caller      : general
01419 
01420 =cut
01421 
01422 sub date_and_mem {
01423   my $date = strftime "%Y-%m-%d %T", localtime;
01424   my $mem = `ps -p $$ -o vsz |tail -1`;
01425   chomp $mem;
01426   return "[$date, mem $mem]";
01427 }
01428 
01429 =head2 date
01430 
01431   Example     : print "Date: " . $support->date . "\n";
01432   Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
01433   Return type : String - the timestamp
01434   Exceptions  : none
01435   Caller      : general
01436 
01437 =cut
01438 
01439 sub date {
01440   return strftime "%Y-%m-%d %T", localtime;
01441 }
01442 
01443 =head2 format_time
01444 
01445   Example     : print $support->format_time($gene->modifed_date) . "\n";
01446   Description : Prints timestamps from the database
01447   Return type : String - nicely formatted time stamp
01448   Exceptions  : none
01449   Caller      : general
01450 
01451 =cut
01452 
01453 
01454 sub date_format {
01455   my( $self, $time, $format ) = @_;
01456   my( $d,$m,$y) = (localtime($time))[3,4,5];
01457   my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900);
01458   (my $res = $format ) =~s/%(\w)/$S{$1}/ge;
01459   return $res;
01460 }
01461 
01462 
01463 =head2 mem
01464 
01465   Example     : print "Memory usage: " . $support->mem . "\n";
01466   Description : Prints the memory used by your script. Not sure about platform
01467                 dependence of this call ...
01468   Return type : String - memory usage
01469   Exceptions  : none
01470   Caller      : general
01471 
01472 =cut
01473 
01474 sub mem {
01475   my $mem = `ps -p $$ -o vsz |tail -1`;
01476   chomp $mem;
01477   return $mem;
01478 }
01479 
01480 =head2 commify
01481 
01482   Arg[1]      : Int $num - a number to commify
01483   Example     : print "An easy to read number: ".$self->commify(100000000);
01484                 # will print 100,000,000
01485   Description : put commas into a number to make it easier to read
01486   Return type : a string representing the commified number
01487   Exceptions  : none
01488   Caller      : general
01489   Status      : stable
01490 
01491 =cut
01492 
01493 sub commify {
01494   my $self = shift;
01495   my $num = shift;
01496 
01497   $num = reverse($num);
01498   $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
01499 
01500   return scalar reverse $num;
01501 }
01502 
01503 =head2 fetch_non_hidden_slices
01504 
01505   Arg[1]      : B::E::SliceAdaptor
01506   Arg[2]      : B::E::AttributeAdaptor
01507   Arg[3]      : string $coord_system_name (optional) - 'chromosome' by default
01508   Arg[4]      : string $coord_system_version (optional) - 'otter' by default
01509   Example     : $chroms = $support->fetch_non_hidden_slice($sa,$aa);
01510   Description : retrieve all slices from a loutre database that don't have a hidden attribute.
01511                 Doesn't retrieve non-reference slices
01512   Return type : arrayref
01513   Caller      : general
01514   Status      : stable
01515 
01516 =cut
01517 
01518 sub fetch_non_hidden_slices {
01519   my $self = shift;
01520   my $aa   = shift or throw("You must supply an attribute adaptor");
01521   my $sa   = shift or throw("You must supply a slice adaptor");
01522   my $cs   = shift || 'chromosome';
01523   my $cv   = shift || 'Otter';
01524   my $visible_chroms;
01525   foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
01526     my $chrom_name = $chrom->name;
01527     my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
01528     if ( scalar(@$attribs) > 1 ) {
01529       $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
01530     }
01531     elsif ($attribs->[0]->value == 0) {             
01532       push @$visible_chroms, $chrom;
01533     }
01534     elsif ($attribs->[0]->value == 1) { 
01535       $self->log_verbose("chromosome $chrom_name is hidden\n"); 
01536     }
01537     else {
01538       $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
01539     }
01540   }
01541   return $visible_chroms;
01542 }
01543 
01544 =head2 get_non_hidden_slice_names
01545 
01546   Arg[1]      : B::E::SliceAdaptor
01547   Arg[2]      : B::E::AttributeAdaptor
01548   Arg[3]      : string $coord_system_name (optional) - 'chromosome' by default
01549   Arg[4]      : string $coord_system_version (optional) - 'otter' by default
01550   Example     : $chrom_names = $support->get_non_hidden_slice_names($sa,$aa);
01551   Description : retrieve names of all slices from a loutre database that don't have a hidden attribute.
01552                 Doesn't retrieve non-reference slices
01553   Return type : arrayref of names of all non-hidden slices
01554   Caller      : general
01555   Status      : stable
01556 
01557 =cut
01558 
01559 sub get_non_hidden_slice_names {
01560   my $self = shift;
01561   my $aa   = shift or throw("You must supply an attribute adaptor");
01562   my $sa   = shift or throw("You must supply a slice adaptor");
01563   my $cs   = shift || 'chromosome';
01564   my $cv   = shift || 'Otter';
01565   my $visible_chrom_names;
01566   foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
01567     my $chrom_name = $chrom->seq_region_name;
01568     my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
01569     if ( scalar(@$attribs) > 1 ) {
01570       $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
01571     }
01572     elsif ($attribs->[0]->value == 0) {             
01573       push @$visible_chrom_names, $chrom_name;
01574     }
01575     elsif ($attribs->[0]->value == 1) { 
01576       $self->log_verbose("chromosome $chrom_name is hidden\n"); 
01577     }
01578     else {
01579       $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
01580     }
01581   }
01582   return $visible_chrom_names;
01583 }
01584 
01585 
01586 =head2 get_wanted_chromosomes
01587 
01588   Arg[1]      : B::E::SliceAdaptor
01589   Arg[2]      : B::E::AttributeAdaptor
01590   Arg[3]      : string $coord_system_name (optional) - 'chromosome' by default
01591   Arg[4]      : string $coord_system_version (optional) - 'otter' by default
01592   Example     : $chr_names = $support->get_wanted_chromosomes($laa,$lsa);
01593   Description : retrieve names of slices from a lutra database that are ready for dumping to Vega.
01594                 Deals with list of names to ignore (ignore_chr = LIST)
01595   Return type : arrayref of slices
01596   Caller      : general
01597   Status      : stable
01598 
01599 =cut
01600 
01601 sub get_wanted_chromosomes {
01602   my $self = shift;
01603   my $aa   = shift or throw("You must supply an attribute adaptor");
01604   my $sa   = shift or throw("You must supply a slice adaptor");
01605   my $cs   = shift || 'chromosome';
01606   my $cv   = shift || 'Otter';
01607   my $export_mode = $self->param('release_type');
01608   my $release = $self->param('vega_release');
01609   my $names;
01610   my $chroms  = $self->fetch_non_hidden_slices($aa,$sa,$cs,$cv);
01611  CHROM:
01612   foreach my $chrom (@$chroms) {
01613     my $attribs = $aa->fetch_all_by_Slice($chrom);
01614     my $vals = $self->get_attrib_values($attribs,'vega_export_mod');
01615     if (scalar(@$vals > 1)) {
01616       $self->log_warning ("Multiple attribs for \'vega_export_mod\', please fix before continuing");
01617       exit;
01618     }
01619     next CHROM if (! grep { $_ eq $export_mode} @$vals);
01620     $vals =  $self->get_attrib_values($attribs,'vega_release',$release);    
01621     if (scalar(@$vals > 1)) {
01622       $self->log_warning ("Multiple attribs for \'vega_release\' value = $release , please fix before continuing");
01623       exit;
01624     }
01625     next CHROM if (! grep { $_ eq $release} @$vals);
01626     my $name = $chrom->seq_region_name;
01627     if (my @ignored = $self->param('ignore_chr')) {
01628       next CHROM if (grep {$_ eq $name} @ignored);
01629     }
01630     push @{$names}, $name;
01631   }
01632   return $names;
01633 }
01634 
01635 =head2 get_unique_genes
01636 
01637   Arg[1]      : B::E::Slice
01638   Arg[2]      : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
01639   Example     : $genes = $support->get_unique_genes($slice,$dba);
01640   Description : Retrieve genes that are only on the slice itself - used for human where assembly patches
01641                 are in the assembly_exception table. Needs the PATCHes to have 'non_ref' seq_region_attributes.
01642   Return type : arrayref of genes
01643   Caller      : general
01644   Status      : stable
01645 
01646 =cut
01647 
01648 sub get_unique_genes {
01649   my $self  = shift;
01650   my ($slice,$dba) = @_;
01651   $slice or throw("You must supply a slice");
01652   $dba ||= $self->dba;
01653 
01654   my $sa    = $dba->get_adaptor('Slice');
01655   my $ga    = $dba->get_adaptor('Gene');
01656   my $patch = 0;
01657   my $genes = [];
01658   if ( ! $slice->is_reference() ) {
01659 #  if ( 0 ) {
01660     $patch = 1;
01661     my $slices = $sa->fetch_by_region_unique( $slice->coord_system_name(),$slice->seq_region_name() );
01662     foreach my $slice ( @{$slices} ) {
01663       push @$genes,@{$ga->fetch_all_by_Slice($slice)};
01664       #      my $start = $slice->start;
01665     }
01666   }
01667   else {
01668     $genes = $ga->fetch_all_by_Slice($slice);
01669   }
01670   return ($genes, $patch);
01671 }
01672 
01673 
01674 
01675 =head2 get_attrib_values
01676 
01677   Arg[1]      : Arrayref of B::E::Attributes
01678   Arg[2]      : 'code' to search for
01679   Arg[3]      : 'value' to search for (optional)
01680   Example     : my $c = $self->get_attrib_values($attribs,'name'));
01681   Description : (i) In the absence of an attribute value argument, examines an arrayref
01682                 of B::E::Attributes for a particular attribute type, returning the values
01683                 for each attribute of that type. Can therefore be used to test for the
01684                 number of attributes of that type.
01685                 (ii) In the presence of the optional value argument it returns all
01686                 attributes with that value ie can be used to test for the presence of an
01687                 attribute with that particular value.
01688   Return type : arrayref of values for that attribute
01689   Caller      : general
01690   Status      : stable
01691 
01692 =cut
01693 
01694 sub get_attrib_values {
01695   my $self    = shift;
01696   my $attribs = shift;
01697   my $code    = shift;
01698   my $value   = shift;
01699   if (my @atts = grep {$_->code eq $code } @$attribs) {
01700     my $r = [];
01701     if ($value) {
01702       if (my @values = grep {$_->value eq $value} @atts) {
01703     foreach (@values) {
01704       push @$r, $_->value;
01705     }
01706     return $r;
01707       }
01708       else {
01709     return [];
01710       }
01711     }
01712     else {
01713       foreach (@atts) {
01714     push @$r, $_->value;
01715       }
01716       return $r;
01717     }
01718   }
01719   else {
01720     return [];
01721   }
01722 }
01723 
01724 =head2 fix_attrib_value
01725 
01726   Arg[1]      : Arrayref of existing B::E::Attributes
01727   Arg[2]      : dbID of object
01728   Arg[3]      : name of object (just for reporting)
01729   Arg[4]      : attrib_type.code
01730   Arg[5]      : attrib_type.value
01731   Arg[6]      : interactive ? (0 by default)
01732   Arg[7]      : table
01733   Example     : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod','N',1);
01734   Description : adds a new attribute to an object, or updates an existing attribute with a new value
01735                 Can be run in interactive or non-interactive mode (default)
01736   Return type : arrayref of results
01737   Caller      : general
01738   Status      : only ever tested with seq_region_attributes to date
01739 
01740 =cut
01741 
01742 sub fix_attrib_value {
01743   my $self        = shift;
01744   my $attribs     = shift;
01745   my $id          = shift;
01746   my $name        = shift;
01747   my $code        = shift;
01748   my $value       = shift;
01749   my $interact    = shift || 0;
01750   my $table       = shift || 'seq_region_attrib';
01751 
01752   #transiently set interactive parameter to zero
01753   my $int_before;
01754   if (! $interact) {
01755     $int_before = $self->param('interactive');
01756     $self->param('interactive',0);
01757   }
01758 
01759   #get any existing value(s) for this attribute
01760   my $existings = $self->get_attrib_values($attribs,$code);
01761     
01762   #add a new attribute if there is none...
01763   if (! @$existings ) {
01764     if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) {
01765       my $r = $self->store_new_attribute($id,$code,$value);
01766 
01767       #reset interactive parameter
01768       $self->param('interactive',$int_before) if (! $interact);
01769       return $r;
01770         }
01771   }
01772   #...warn and exit if you're trying to update more than one value for the same attribute...
01773   elsif (scalar @$existings > 1) {
01774     $self->log_warning("You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n");
01775     exit;
01776   }
01777 
01778   #...or update an attribute with new values...
01779   else {
01780     my $existing = $existings->[0];
01781     if ($existing ne $value) {
01782       if ($self->user_proceed("Do you want to reset $name attrib (code = $code) from $existing to $value ?")) {
01783     my $r = $self->update_attribute($id,$code,$value);
01784     $self->param('interactive',$int_before) if (! $interact);
01785     push @$r, $existing;
01786     return $r;
01787       }
01788     }
01789     #...or make no change
01790     else {
01791       $self->param('interactive',$int_before) if (! $interact);
01792       return [];
01793     }
01794   }
01795 }
01796 
01797 =head2 _get_attrib_id
01798 
01799   Arg[1]      : attrib_type.code 
01800   Arg[2]      : database handle 
01801   Example     : $self->_get_attrib_id('name',$dbh)
01802   Description : get attrib_type.attrib_type_id from a attrib_type.code
01803   Return type : attrib_type.attrib_type_id 
01804   Caller      : internal
01805   Status      : stable
01806 
01807 =cut
01808 
01809 sub _get_attrib_id {
01810   my $self        = shift;
01811   my $attrib_code = shift;
01812   my $dbh         = shift;
01813   my ($attrib_id) = $dbh->selectrow_array(
01814     qq(select attrib_type_id
01815            from attrib_type
01816            where code = ?),
01817     {},
01818     ($attrib_code)
01819   );
01820   if (! $attrib_id) {
01821     $self->log_warning("There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n");
01822     exit;
01823   }
01824   else {
01825     return $attrib_id;
01826   }
01827 }
01828 
01829 =head2 store_new_attribute
01830 
01831   Arg[1]      : seq_region.seq_region_id
01832   Arg[2]      : attrib_type.code
01833   Arg[3]      : attrib_type.value
01834   ARG[4]      : table to update (seq_region_attribute by default)
01835   Example     : $support->store_new_attribute(23,name,5);
01836   Description : uses MySQL to store an entry (code and value) in an attribute table 
01837                 (seq_region_attrib by default)
01838   Return type : array_ref
01839   Caller      : general
01840   Status      : stable
01841 
01842 =cut
01843 
01844 sub store_new_attribute {
01845   my $self         = shift;
01846   my $sr_id        = shift;
01847   my $attrib_code  = shift;
01848   my $attrib_value = shift || '';
01849   my $table        = shift || 'seq_region_attrib';
01850 
01851   #get database handle
01852   my $dbh = $self->get_dbconnection('loutre');
01853   #get attrib_type_id for this particular attribute
01854   my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
01855   #store
01856   my $r = $dbh->do(
01857     qq(insert into $table
01858            values (?,?,?)),
01859     {},
01860     ($sr_id,$attrib_id,$attrib_value)
01861   );
01862   return ['Stored',$r];
01863 }
01864 
01865 =head2 update_attribute
01866 
01867   Arg[1]      : seq_region.seq_region_id
01868   Arg[2]      : attrib_type.code
01869   Arg[3]      : attrib_type.value
01870   ARG[4]      : table to update (seq_region_attribute by default)
01871   Example     : $support->update_attribute(23,name,5);
01872   Description : uses MySQL to update an attribute table (seq_region_attrib by default)
01873   Return type : array_ref
01874   Caller      : general
01875   Status      : stable
01876 
01877 =cut
01878 
01879 sub update_attribute {
01880   my $self = shift;
01881   my $sr_id = shift;
01882   my $attrib_code  = shift;
01883   my $attrib_value = shift;
01884   my $table        = shift || 'seq_region_attrib';
01885   my $dbh = $self->get_dbconnection('loutre');
01886   my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
01887   #update
01888   my $r = $dbh->do(
01889     qq(update $table
01890            set value = ?
01891            where seq_region_id = $sr_id
01892            and attrib_type_id = $attrib_id),
01893     {},
01894     ($attrib_value)
01895   );
01896   return ['Updated',$r];
01897 }
01898 
01899 1;