Archive Ensembl HomeArchive Ensembl Home
FileAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBFile::FileAdaptor - Base Adaptor for direct file access
00024 
00025 =head1 SYNOPSIS
00026 
00027 
00028 
00029 =head1 DESCRIPTION
00030 
00031 Basic wrapper class to provide access to file based data.
00032 
00033 This is primarily aimed at indexed Collection(.col) files which are optimised for Slice 
00034 based queries. Collections store fixed width width/windowed data as BLOBS.  This makes 
00035 it possible to seek to the a required location given slice coordinate and read the only 
00036 the required amount of data covering the slice.
00037 
00038 Currently only works as hybrid DBAdaptor e.g. ResultFeatureAdaptor which inherits both from 
00039 here and BaseFeatureAdaptor.
00040 
00041 =head1 SEE ALSO
00042 
00043 
00044 =cut
00045 
00046 
00047 
00048 package Bio::EnsEMBL::DBFile::FileAdaptor;
00049 
00050 use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate);
00051 use strict;
00052 use warnings;
00053 
00054 
00055 =head2 get_filehandle
00056 
00057   Arg[1]     : string     - filepath
00058   Arg[2]     : HASHREF    - Optional params, see open_file
00059   Example    : my $fh     = $self->get_filehandle($filepath, 1);
00060   Description: Gets and caches a simple file handle.
00061   Returntype : GLOB/undef - filehandle
00062   Exceptions : warns if cache entry exists but is not defined 
00063   Caller     : general
00064   Status     : at risk
00065 
00066 =cut
00067 
00068 sub get_filehandle{
00069   my ($self, $filepath, $params_hash) = @_;
00070 
00071   my $file_op = '<';
00072 
00073   if(exists $params_hash->{-file_operator}){
00074     $file_op = $params_hash->{-file_operator};
00075   }else{
00076     $params_hash->{-file_operator} = $file_op;
00077   }
00078 
00079   if(! exists $self->{file_cache}{$filepath}{filehandle}){
00080     my $fh = $self->Bio::EnsEMBL::DBFile::FileAdaptor::open_file($filepath, $params_hash);
00081 
00082     if(defined $fh){
00083       $self->{file_cache}{$filepath}{filehandle} = $fh;
00084       #$self->initialise_filehandle($filepath) if $self->can('initialise_filehandle');
00085       $self->initialise_filehandle($filepath) if($file_op eq '<');
00086     }
00087   }
00088   elsif(! defined $self->{file_cache}{$filepath}{filehandle}){
00089     #This maybe one of several read/seek errors which will have already been warned
00090     warn "Encountered and error with file handle for $filepath\n";
00091   }
00092   #else
00093   # check against cache file op
00094   # to make sure we aren't trying to open an already open fh with a different operator
00095 
00096  
00097   return $self->{file_cache}{$filepath}{filehandle};
00098 }
00099 
00100 
00101 =head2 open_file
00102 
00103   Arg[1]     : string     - filepath
00104   Arg[2]     : HASHREF    - Optional params:
00105                                  -binmode       => 0|1,   # Boolean i.e. treat file as binary
00106                                  -file_operator => '>'    # Default is '<'
00107                                 #-perms_octal   =>  # Requires FileHandle
00108   Example    : my $fh     = $self->open_file($filepath, {-binmode = > 1, -file_operator => '>'});
00109   Description: Opens a file for reading or writing.
00110   Returntype : GLOB/undef - filehandle
00111   Exceptions : warns if file open fails
00112                warns if file operator unsupported
00113                warns if failed to set binmode
00114   Caller     : general
00115   Status     : at risk
00116 
00117 =cut
00118 
00119 sub open_file{
00120   my ($self, $filepath, $params_hash) = @_;
00121 
00122   #Validate params_hash? 
00123   #rearrange? Will not warn/throw for invalid keys?
00124   #perms octal, requires FileHandle? See EFGUtils::open_file
00125 
00126 
00127 
00128   my $file_op = $params_hash->{-file_operator} || '<';
00129 
00130   if(($file_op ne '<') &&
00131      ($file_op ne '>') &&
00132      ($file_op ne '>>')){
00133     throw("Cannot perform open with unsupported operator:\t${file_op}${filepath}");
00134   }
00135 
00136   my $fh;
00137   my $success = open($fh, "${file_op}${filepath}");
00138   #$fh will be still be GLOB on fail
00139   
00140   #These warn instead of throw/die to allow
00141   #open_file to be used to test a file
00142   #this prevents throws/die when an attempting to access an absent file (good for webcode)
00143   #could alternatively change to throw/die and eval where required
00144   #prevents need to catch everywhere else and potential double reporting of error
00145 
00146   if(! $success){
00147     #undef $fh;
00148     throw("Failed to open:\t$filepath\n$!\n");
00149   }
00150   elsif($params_hash->{-binmode}){
00151     $success = binmode $fh;
00152       
00153     if(! $success){
00154       throw("Failed to set binmode:\t$filepath\n$!");
00155       #undef $fh;
00156     }
00157   }
00158 
00159   return $fh;
00160 }
00161 
00162 
00163 =head2 validate_file_length
00164 
00165   Arg[1]     : string  - filepath
00166   Arg[2]     : int     - expected length in bytes
00167   Example    : $self->validate_file_length($filepath, $expected_length);
00168   Description: Utility method which can be used during file creation
00169   Returntype : None
00170   Exceptions : warns if file open fails
00171                throws if file is not expected length
00172   Caller     : general
00173   Status     : at risk - change to seek to accounts for 'logical characters'
00174 
00175 =cut
00176 
00177 sub validate_file_length{
00178   my ($self, $filepath, $expected_length, $binmode) = @_;
00179 
00180   #Currently not using cache as we rarely want to 
00181   #use the file handle afterwards
00182 
00183 
00184   #THIS WAS USING EFGUtils::open_file imported in the Collector::ResultFeature!!!!
00185   #which is just a sub not a class method, and is in a parallel inheritance path
00186   #No warnings about redefining method :(
00187   #Force use of FileAdaptor::open_file
00188 
00189   my $fh = $self->Bio::EnsEMBL::DBFile::FileAdaptor::open_file($filepath, {-binmode => $binmode});
00190 
00191 
00192   #sysseek always returns length in bytes, change to seek which 
00193   #uses logical characters i.e. actual encoding?
00194   #Does seek use bytes in binmode and chars in non-binmode?
00195 
00196   my $seeked_bytes = sysseek($fh, 0, 2);# 2 is SEEK_END
00197   #There is no systell function. Use sysseek(FH, 0, 1) for that.
00198 
00199   if($seeked_bytes < $expected_length){
00200     throw("File is shorter($seeked_bytes) than expected($expected_length):\t$filepath\n");
00201   }
00202   elsif($seeked_bytes > $expected_length){
00203     throw("File is longer($seeked_bytes) than expected($expected_length):\t$filepath\n");
00204   }
00205  
00206   return;
00207 }
00208 
00209 
00210 
00211 
00212 
00213 ### STUBB/TEMPLATE METHODS ###
00214 #
00215 #   If required hese should be over-ridden in the 
00216 #   descendant FileAdaptor e.g. CollectionAdaptor
00217 #   Listed here rather for visibility (rather than 
00218 #   using 'can')
00219 
00220 
00221 sub initialise_filehandle{
00222   return;
00223 }
00224 
00225 
00226 
00227 1;