Archive Ensembl HomeArchive Ensembl Home
BaseFeatureAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - An Abstract Base class for all
00024 FeatureAdaptors
00025 
00026 =head1 SYNOPSIS
00027 
00028 Abstract class - should not be instantiated.  Implementation of
00029 abstract methods must be performed by subclasses.
00030 
00031 =head1 DESCRIPTION
00032 
00033 This is a base adaptor for feature adaptors. This base class is simply a way
00034 of eliminating code duplication through the implementation of methods
00035 common to all feature adaptors.
00036 
00037 =head1 METHODS
00038 
00039 =cut
00040 
00041 package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
00042 use vars qw(@ISA @EXPORT);
00043 use strict;
00044 
00045 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00046 use Bio::EnsEMBL::Utils::Cache;
00047 use Bio::EnsEMBL::Utils::Exception qw(warning throw deprecate stack_trace_dump);
00048 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00049 use Bio::EnsEMBL::Utils::Iterator;
00050 
00051 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
00052 
00053 @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
00054 
00055 our $SLICE_FEATURE_CACHE_SIZE    = 4;
00056 our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
00057 our $SILENCE_CACHE_WARNINGS = 0;
00058 
00059 =head2 new
00060 
00061   Arg [1]    : list of args @args
00062                Superclass constructor arguments
00063   Example    : none
00064   Description: Constructor which warns if caching has been switched off
00065   Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
00066   Exceptions : none
00067   Caller     : implementing subclass constructors
00068   Status     : Stable
00069 
00070 =cut
00071 
00072 sub new {
00073   my ($class, @args) = @_;
00074   my $self = $class->SUPER::new(@args);
00075   if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
00076     warning(  "You are using the API without caching most recent features. "
00077             . "Performance might be affected." );
00078   }
00079   return $self;
00080 }
00081 
00082 =head2 start_equals_end
00083 
00084   Arg [1]    : (optional) boolean $newval
00085   Example    : $bfa->start_equals_end(1);
00086   Description: Getter/Setter for the start_equals_end flag.  If set
00087                to true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
00088   Returntype : boolean
00089   Exceptions : none
00090   Caller     : EnsemblGenomes variation DB build
00091   Status     : Stable
00092   
00093 =cut
00094 
00095 sub start_equals_end {
00096   my ( $self, $value ) = @_;
00097 
00098   if ( defined($value) ) {
00099     $self->{'start_equals_end'} = $value;
00100   }
00101   return $self->{'start_equals_end'};
00102 }
00103 
00104 
00105 =head2 clear_cache
00106 
00107   Args      : None
00108   Example   : my $sa =
00109                 $registry->get_adaptor( 'Mus musculus', 'Core',
00110                                         'Slice' );
00111               my $ga =
00112                 $registry->get_adaptor( 'Mus musculus', 'Core',
00113                                         'Gene' );
00114 
00115               my $slice =
00116                 $sa->fetch_by_region( 'Chromosome', '1', 1e8,
00117                                       1.05e8 );
00118 
00119               my $genes = $ga->fetch_all_by_Slice($slice);
00120 
00121               $ga->clear_cache();
00122 
00123   Description   : Empties the feature cache associated with this
00124                   feature adaptor.
00125   Return type   : None
00126   Exceptions    : None
00127   Caller        : General
00128   Status        : At risk (under development)
00129 
00130 =cut
00131 
00132 sub clear_cache {
00133   my ($self) = @_;
00134   %{$self->{_slice_feature_cache}} = ();
00135   return;
00136 }
00137 
00138 =head2 _slice_feature_cache
00139  
00140   Description   : Returns the feature cache if we are allowed to cache and
00141                 will build it if we need to. We will never return a reference
00142                 to the hash to avoid unintentional auto-vivfying caching
00143   Returntype    : Bio::EnsEMBL::Utils::Cache
00144   Exceptions    : None
00145   Caller        : Internal
00146 
00147 =cut
00148 
00149 sub _slice_feature_cache {
00150   my ($self) = @_;
00151   return if $self->db()->no_cache();
00152   if(! exists $self->{_slice_feature_cache}) {
00153     tie my %cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
00154     $self->{_slice_feature_cache} = \%cache;
00155   }
00156   return $self->{_slice_feature_cache};
00157 }
00158 
00159 =head2 fetch_all_by_Slice
00160 
00161   Arg [1]    : Bio::EnsEMBL::Slice $slice
00162                the slice from which to obtain features
00163   Arg [2]    : (optional) string $logic_name
00164                the logic name of the type of features to obtain
00165   Example    : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
00166   Description: Returns a listref of features created from the database 
00167                which are on the Slice defined by $slice. If $logic_name is 
00168                defined only features with an analysis of type $logic_name 
00169                will be returned. 
00170                NOTE: only features that are entirely on the slice's seq_region
00171                will be returned (i.e. if they hang off the start/end of a
00172                seq_region they will be discarded). Features can extend over the
00173                slice boundaries though (in cases where you have a slice that
00174                doesn't span the whole seq_region).
00175   Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
00176   Exceptions : none
00177   Caller     : Bio::EnsEMBL::Slice
00178   Status     : Stable
00179 
00180 =cut
00181 
00182 sub fetch_all_by_Slice {
00183   my ($self, $slice, $logic_name) = @_;
00184   #fetch by constraint with empty constraint
00185   return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name);
00186 }
00187 
00188 
00189 
00190 =head2 fetch_Iterator_by_Slice_method
00191 
00192   Arg [1]    : CODE ref of Slice fetch method
00193   Arg [2]    : ARRAY ref of parameters for Slice fetch method
00194   Arg [3]    : Optional int: Slice index in parameters array
00195   Arg [4]    : Optional int: Slice chunk size. Default=500000
00196   Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
00197                                       ($feature_adaptor->can('fetch_all_by_Slice_Arrays'),
00198                                        \@fetch_method_params,
00199                                        0,#Slice idx
00200                                       );
00201 
00202                while(my $feature = $slice_iter->next && defined $feature){
00203                  #Do something here
00204                }
00205 
00206   Description: Creates an Iterator which chunks the query Slice to facilitate
00207                large Slice queries which would have previously run out of memory
00208   Returntype : Bio::EnsEMBL::Utils::Iterator
00209   Exceptions : Throws if mandatory params not valid
00210   Caller     : general
00211   Status     : at risk
00212 
00213 =cut
00214 
00215 #Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method
00216 
00217 sub fetch_Iterator_by_Slice_method{
00218   my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
00219 
00220   if(! ( defined $slice_method_ref &&
00221          ref($slice_method_ref) eq 'CODE')
00222     ){
00223     throw('Must pass a valid Slice fetch method CODE ref');
00224   }
00225 
00226   if (! ($params_ref && 
00227          ref($params_ref) eq 'ARRAY')) {
00228     #Don't need to check size here so long as we have valid Slice
00229     throw('You must pass a method params ARRAYREF');
00230   }
00231   
00232   $slice_idx    = 0 if(! defined $slice_idx);
00233   my $slice     = $params_ref->[$slice_idx];
00234   $chunk_size ||= 1000000;
00235         
00236   my @feat_cache;
00237   my $finished     = 0;
00238   my $start        = 1; #local coord for sub slice
00239   my $end          = $slice->length;
00240   my $num_overlaps = 0;
00241   
00242   my $coderef = 
00243     sub {
00244       
00245       while (scalar(@feat_cache) == 0 &&
00246              ! $finished) {
00247         
00248         my $new_end = ($start + $chunk_size - 1);
00249         
00250         if ($new_end >= $end) {
00251           # this is our last chunk
00252           $new_end = $end;
00253           $finished = 1;  
00254         }
00255         
00256         #Chunk by sub slicing
00257         my $sub_slice             = $slice->sub_Slice($start, $new_end);
00258         $params_ref->[$slice_idx] = $sub_slice;
00259         @feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
00260         
00261         #Remove & count overlapping features
00262         splice(@feat_cache, 0, $num_overlaps) if($num_overlaps);
00263         my $i;
00264         
00265         if (scalar(@feat_cache) > 0) {
00266           
00267           my $feat_end  = $feat_cache[$#feat_cache]->seq_region_end;
00268           my $slice_end = $sub_slice->end;
00269           $num_overlaps = 0;
00270           
00271           for ($i = $#feat_cache; $i >=0; $i--) {
00272             
00273             if ($feat_end > $slice_end) {
00274               $feat_end  = $feat_cache[$i]->end;
00275               $num_overlaps ++;
00276             } else {
00277               last;
00278             }
00279             
00280           }
00281         }
00282         
00283         # update the start coordinate
00284         $start = $new_end + 1;
00285       }
00286       
00287       #this maybe returning from an undef cache
00288       #Need to sub this out even more?
00289       return shift @feat_cache;
00290     };
00291 
00292   return Bio::EnsEMBL::Utils::Iterator->new($coderef);
00293 }
00294 
00295 
00296 =head2 fetch_Iterator_by_Slice
00297 
00298   Arg [1]    : Bio::EnsEMBL::Slice
00299   Arg [2]    : Optional string: logic name of analysis
00300   Arg [3]    : Optional int: Chunk size to iterate over. Default is 500000
00301   Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);
00302 
00303                while(my $feature = $slice_iter->next && defined $feature){
00304                  #Do something here
00305                }
00306 
00307   Description: Creates an Iterator which chunks the query Slice to facilitate
00308                large Slice queries which would have previously run out of memory
00309   Returntype : Bio::EnsEMBL::Utils::Iterator
00310   Exceptions : None
00311   Caller     : general
00312   Status     : at risk
00313 
00314 =cut
00315 
00316 sub fetch_Iterator_by_Slice{
00317   my ($self, $slice, $logic_name, $chunk_size) = @_;
00318 
00319   my $method_ref = $self->can('fetch_all_by_Slice');
00320 
00321   return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
00322 }
00323 
00324 
00325 =head2 fetch_all_by_Slice_and_score
00326 
00327   Arg [1]    : Bio::EnsEMBL::Slice $slice
00328                the slice from which to obtain features
00329   Arg [2]    : (optional) float $score
00330                lower bound of the the score of the features retrieved
00331   Arg [3]    : (optional) string $logic_name
00332                the logic name of the type of features to obtain
00333   Example    : $fts = $a->fetch_all_by_Slice_and_score($slice,90,'Swall');
00334   Description: Returns a list of features created from the database which are 
00335                are on the Slice defined by $slice and which have a score 
00336                greater than $score. If $logic_name is defined, 
00337                only features with an analysis of type $logic_name will be 
00338                returned. 
00339   Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
00340   Exceptions : none
00341   Caller     : Bio::EnsEMBL::Slice
00342   Status     : Stable
00343 
00344 =cut
00345 
00346 sub fetch_all_by_Slice_and_score {
00347   my ( $self, $slice, $score, $logic_name ) = @_;
00348 
00349   my $constraint;
00350   if ( defined($score) ) {
00351     # Get the synonym of the primary_table
00352     my @tabs = $self->_tables();
00353     my $syn  = $tabs[0]->[1];
00354 
00355     $constraint = sprintf( "%s.score > %s",
00356                 $syn,
00357                 $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
00358   }
00359 
00360   return
00361     $self->fetch_all_by_Slice_constraint( $slice, $constraint,
00362                                           $logic_name );
00363 }
00364 
00365 
00366 =head2 fetch_all_by_Slice_constraint
00367 
00368   Arg [1]    : Bio::EnsEMBL::Slice $slice
00369                the slice from which to obtain features
00370   Arg [2]    : (optional) string $constraint
00371                An SQL query constraint (i.e. part of the WHERE clause)
00372   Arg [3]    : (optional) string $logic_name
00373                the logic name of the type of features to obtain
00374   Example    : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5');
00375   Description: Returns a listref of features created from the database which 
00376                are on the Slice defined by $slice and fulfill the SQL 
00377                constraint defined by $constraint. If logic name is defined, 
00378                only features with an analysis of type $logic_name will be 
00379                returned. 
00380   Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
00381   Exceptions : thrown if $slice is not defined
00382   Caller     : Bio::EnsEMBL::Slice
00383   Status     : Stable
00384 
00385 =cut
00386 
00387 sub fetch_all_by_Slice_constraint {
00388   my ( $self, $slice, $constraint, $logic_name ) = @_;
00389 
00390 
00391   my @result = ();
00392 
00393   if ( !ref($slice)
00394        || !(    $slice->isa('Bio::EnsEMBL::Slice')
00395              or $slice->isa('Bio::EnsEMBL::LRGSlice') ) )
00396   {
00397     throw("Bio::EnsEMBL::Slice argument expected.");
00398   }
00399 
00400   $constraint ||= '';
00401   $constraint =
00402     $self->_logic_name_to_constraint( $constraint, $logic_name );
00403 
00404   # If the logic name was invalid, undef was returned
00405   if ( !defined($constraint) ) { return [] }
00406 
00407   my $key;
00408   my $cache;
00409 
00410   # Will only use feature_cache if hasn't been no_cache attribute set
00411   if (
00412     !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
00413   {
00414 
00415     #strain test and add to constraint if so to stop caching.
00416     if ( $slice->isa('Bio::EnsEMBL::StrainSlice') ) {
00417       my $string =
00418         $self->dbc()->db_handle()->quote( $slice->strain_name() );
00419 
00420       if ( $constraint ne "" ) {
00421         $constraint .= " AND $string = $string ";
00422       } else {
00423         $constraint .= " $string = $string ";
00424       }
00425     }
00426 
00427     # Check the cache and return the cached results if we have already
00428     # done this query.  The cache key is the made up from the slice
00429     # name, the constraint, and the bound parameters (if there are any).
00430     $key = uc( join( ':', $slice->name(), $constraint ) );
00431 
00432     my $bind_params = $self->bind_param_generic_fetch();
00433 
00434     if ( defined($bind_params) ) {
00435       $key .= ':'
00436         . join( ':', map { $_->[0] . '/' . $_->[1] } @{$bind_params} );
00437     }
00438 
00439     $cache = $self->_slice_feature_cache();
00440     if ( exists( $cache->{$key} ) ) {
00441       # Clear the bound parameters and return the cached data.
00442       $self->{'_bind_param_generic_fetch'} = ();
00443       return $cache->{$key};
00444     }
00445   } ## end if ( !( defined( $self...)))
00446 
00447   my $sa = $slice->adaptor();
00448 
00449   # Hap/PAR support: retrieve normalized 'non-symlinked' slices.
00450   my @proj = @{ $sa->fetch_normalized_slice_projection($slice) };
00451 
00452 
00453  
00454   if ( !@proj ) {
00455     throw( 'Could not retrieve normalized Slices. '
00456          . 'Database contains incorrect assembly_exception information.'
00457     );
00458   }
00459 
00460   # Want to get features on the FULL original slice as well as any
00461   # symlinked slices.
00462 
00463   # Filter out partial slices from projection that are on same
00464   # seq_region as original slice.
00465 
00466   my $sr_id = $slice->get_seq_region_id();
00467 
00468   @proj = grep { $_->to_Slice->get_seq_region_id() != $sr_id } @proj;
00469 
00470   my $segment = bless( [ 1, $slice->length(), $slice ],
00471                        'Bio::EnsEMBL::ProjectionSegment' );
00472   push( @proj, $segment );
00473 
00474   # construct list of Hap/PAR boundaries for entire seq region
00475   my @bounds;
00476 
00477   my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
00478   if ( $slice->strand() == -1 ) {
00479     $ent_slice = $ent_slice->invert();
00480   }
00481 
00482   my @ent_proj =
00483     @{ $sa->fetch_normalized_slice_projection($ent_slice) };
00484   shift(@ent_proj);    # skip first
00485 
00486   @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
00487 
00488 
00489   # fetch features for the primary slice AND all symlinked slices
00490   foreach my $seg (@proj) {
00491 
00492 
00493     my $offset    = $seg->from_start();
00494     my $seg_slice = $seg->to_Slice();
00495     my $features =
00496       $self->_slice_fetch( $seg_slice, $constraint );
00497 
00498     # If this was a symlinked slice offset the feature coordinates as
00499     # needed.
00500     if ( $seg_slice->name() ne $slice->name() ) {
00501 
00502     FEATURE:
00503       foreach my $f ( @{$features} ) {
00504         if ( $offset != 1 ) {
00505           $f->{'start'} += $offset - 1;
00506           $f->{'end'}   += $offset - 1;
00507         }
00508 
00509         # discard boundary crossing features from symlinked regions
00510         foreach my $bound (@bounds) {
00511           if ( $f->{'start'} < $bound && $f->{'end'} >= $bound ) {
00512             next FEATURE;
00513           }
00514         }
00515 
00516         $f->{'slice'} = $slice;
00517         push( @result, $f );
00518       }
00519     } else {
00520       push( @result, @{$features} );
00521     }
00522   } ## end foreach my $seg (@proj)
00523 
00524   # Will only use feature_cache when set attribute no_cache in DBAdaptor
00525   if ( defined($key) ) {
00526     $cache->{$key} = \@result;
00527   }
00528 
00529   return \@result;
00530 } ## end sub fetch_all_by_Slice_constraint
00531 
00532 
00533 =head2 fetch_all_by_logic_name
00534 
00535   Arg [3]    : string $logic_name
00536                the logic name of the type of features to obtain
00537   Example    : $fs = $a->fetch_all_by_logic_name('foobar');
00538   Description: Returns a listref of features created from the database.
00539                only features with an analysis of type $logic_name will
00540                be returned.  If the logic name is invalid (not in the
00541                analysis table), a reference to an empty list will be
00542                returned.
00543   Returntype : listref of Bio::EnsEMBL::SeqFeatures
00544   Exceptions : thrown if no $logic_name
00545   Caller     : General
00546   Status     : Stable
00547 
00548 =cut
00549 
00550 sub fetch_all_by_logic_name {
00551   my ( $self, $logic_name ) = @_;
00552 
00553   if ( !defined($logic_name) ) {
00554     throw("Need a logic_name");
00555   }
00556 
00557   my $constraint = $self->_logic_name_to_constraint( '', $logic_name );
00558 
00559   if ( !defined($constraint) ) {
00560     warning("Invalid logic name: $logic_name");
00561     return [];
00562   }
00563 
00564   return $self->generic_fetch($constraint);
00565 }
00566 
00567 # Method that creates an object.  Called by the _objs_from_sth() method
00568 # in the sub-classes (the various feature adaptors).  Overridden by the
00569 # feature collection classes.
00570 
00571 sub _create_feature {
00572   my ( $self, $feature_type, $args ) = @_;
00573   return $feature_type->new( %{$args} );
00574 }
00575 
00576 # This is the same as the above, but calls the new_fast() constructor of
00577 # the feature type.
00578 
00579 sub _create_feature_fast {
00580   my ( $self, $feature_type, $args ) = @_;
00581   return $feature_type->new_fast($args);
00582 }
00583 
00584 #
00585 # helper function used by fetch_all_by_Slice_constraint method
00586 #
00587 sub _slice_fetch {
00588   my ( $self, $slice, $orig_constraint ) = @_;
00589 
00590   my $slice_start         = $slice->start();
00591   my $slice_end           = $slice->end();
00592   my $slice_strand        = $slice->strand();
00593   my $slice_cs            = $slice->coord_system();
00594   my $slice_seq_region    = $slice->seq_region_name();
00595   my $slice_seq_region_id = $slice->get_seq_region_id();
00596 
00597   #get the synonym and name of the primary_table
00598   my @tabs = $self->_tables;
00599   my ( $tab_name, $tab_syn ) = @{ $tabs[0] };
00600 
00601   #find out what coordinate systems the features are in
00602   my $mcc      = $self->db->get_MetaCoordContainer();
00603   my @feat_css = ();
00604 
00605   my $mca        = $self->db->get_MetaContainer();
00606   my $value_list = $mca->list_value_by_key( $tab_name . "build.level" );
00607   if ( @$value_list and $slice->is_toplevel() ) {
00608     push @feat_css, $slice_cs;
00609   } else {
00610     @feat_css =
00611       @{ $mcc->fetch_all_CoordSystems_by_feature_type($tab_name) };
00612   }
00613 
00614   my $asma = $self->db->get_AssemblyMapperAdaptor();
00615   my @features;
00616 
00617   # fetch the features from each coordinate system they are stored in
00618 COORD_SYSTEM: foreach my $feat_cs (@feat_css) {
00619     my $mapper;
00620     my @coords;
00621     my @ids;
00622 
00623     if ( $feat_cs->equals($slice_cs) ) {
00624       # no mapping is required if this is the same coord system
00625 
00626       my $max_len = $self->_max_feature_length()
00627         || $mcc->fetch_max_length_by_CoordSystem_feature_type( $feat_cs,
00628                                                             $tab_name );
00629 
00630       my $constraint = $orig_constraint;
00631 
00632       my $sr_id;
00633       if ( $slice->adaptor() ) {
00634         $sr_id = $slice->adaptor()->get_seq_region_id($slice);
00635       } else {
00636         $sr_id =
00637           $self->db()->get_SliceAdaptor()->get_seq_region_id($slice);
00638       }
00639 
00640       # If there is mapping information, use the external_seq_region_id
00641       # to get features.
00642 
00643       my @sr_ids = ($sr_id);
00644 
00645       while (1) {
00646         my $ext_sr_id = $self->get_seq_region_id_external($sr_id);
00647 
00648         if ( $ext_sr_id == $sr_id ) { last }
00649 
00650         push( @sr_ids, $ext_sr_id );
00651         $sr_id = $ext_sr_id;
00652       }
00653 
00654       $constraint .= " AND " if ($constraint);
00655 
00656       
00657       $constraint .= "${tab_syn}.seq_region_id IN ("
00658           . join( ',', @sr_ids ) . ") AND";
00659 
00660       #faster query for 1bp slices where SNP data is not compressed
00661       if ( $self->start_equals_end && $slice_start == $slice_end ) {
00662       $constraint .=
00663           " AND ${tab_syn}.seq_region_start = $slice_end" .
00664           " AND ${tab_syn}.seq_region_end = $slice_start";
00665 
00666       } else {
00667 
00668       if ( !$slice->is_circular() ) {
00669           # Deal with the default case of a non-circular chromosome.
00670           $constraint .= " ${tab_syn}.seq_region_start <= $slice_end AND "
00671           . "${tab_syn}.seq_region_end >= $slice_start";
00672 
00673           if ( $max_len ) {
00674           my $min_start = $slice_start - $max_len;
00675           $constraint .= " AND ${tab_syn}.seq_region_start >= $min_start";
00676           }
00677 
00678       } else {
00679           # Deal with the case of a circular chromosome.
00680           if ( $slice_start > $slice_end ) {
00681           $constraint .= " ( ${tab_syn}.seq_region_start >= $slice_start "
00682               . "OR ${tab_syn}.seq_region_start <= $slice_end "
00683               . "OR ${tab_syn}.seq_region_end >= $slice_start "
00684               . "OR ${tab_syn}.seq_region_end <= $slice_end "
00685               . "OR ${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end)";
00686 
00687           } else {
00688           $constraint .= " ((${tab_syn}.seq_region_start <= $slice_end "
00689               . "AND ${tab_syn}.seq_region_end >= $slice_start) "
00690               . "OR (${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end "
00691               . "AND (${tab_syn}.seq_region_start <= $slice_end "
00692               . "OR ${tab_syn}.seq_region_end >= $slice_start)))";
00693           }
00694       }
00695 
00696       }
00697 
00698       my $fs = $self->generic_fetch( $constraint, undef, $slice );
00699 
00700     # features may still have to have coordinates made relative to slice
00701     # start
00702       $fs = $self->_remap( $fs, $mapper, $slice );
00703 
00704       push @features, @$fs;
00705     } else {
00706       $mapper = $asma->fetch_by_CoordSystems( $slice_cs, $feat_cs );
00707 
00708       next unless defined $mapper;
00709 
00710       # Get list of coordinates and corresponding internal ids for
00711       # regions the slice spans
00712       @coords =
00713         $mapper->map( $slice_seq_region, $slice_start, $slice_end,
00714                       $slice_strand, $slice_cs );
00715 
00716       @coords = grep { !$_->isa('Bio::EnsEMBL::Mapper::Gap') } @coords;
00717 
00718       next COORD_SYSTEM if ( !@coords );
00719 
00720       @ids = map { $_->id() } @coords;
00721       #coords are now id rather than name
00722       #      @ids = @{$asma->seq_regions_to_ids($feat_cs, \@ids)};
00723 
00724       # When regions are large and only partially spanned
00725       # by slice it is faster to to limit the query with
00726       # start and end constraints.  Take simple approach:
00727       # use regional constraints if there are less than a
00728       # specific number of regions covered.
00729 
00730       if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS ) {
00731         my $constraint = $orig_constraint;
00732         my $id_str = join( ',', @ids );
00733         $constraint .= " AND " if ($constraint);
00734         $constraint .= "${tab_syn}.seq_region_id IN ($id_str)";
00735         my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
00736 
00737         $fs = $self->_remap( $fs, $mapper, $slice );
00738 
00739         push @features, @$fs;
00740 
00741       } else {
00742         # do multiple split queries using start / end constraints
00743 
00744         my $max_len = (
00745                 $self->_max_feature_length()
00746                   || $mcc->fetch_max_length_by_CoordSystem_feature_type(
00747                                                      $feat_cs, $tab_name
00748                   ) );
00749 
00750         my $len = @coords;
00751         for ( my $i = 0; $i < $len; $i++ ) {
00752           my $constraint = $orig_constraint;
00753           $constraint .= " AND " if ($constraint);
00754           $constraint .=
00755               "${tab_syn}.seq_region_id = "
00756             . $ids[$i] . " AND "
00757             . "${tab_syn}.seq_region_start <= "
00758             . $coords[$i]->end() . " AND "
00759             . "${tab_syn}.seq_region_end >= "
00760             . $coords[$i]->start();
00761 
00762           if ($max_len) {
00763             my $min_start = $coords[$i]->start() - $max_len;
00764             $constraint .=
00765               " AND ${tab_syn}.seq_region_start >= $min_start";
00766           }
00767           my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
00768 
00769           $fs = $self->_remap( $fs, $mapper, $slice );
00770 
00771           push @features, @$fs;
00772         }
00773       } ## end else [ if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS)]
00774     } ## end else [ if ( $feat_cs->equals(...))]
00775   } ## end foreach my $feat_cs (@feat_css)
00776 
00777   return \@features;
00778 } ## end sub _slice_fetch
00779 
00780 
00781 #for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
00782 sub get_seq_region_id_external {
00783   my ( $self, $sr_id ) = @_;
00784   my $cs_a = $self->db()->get_CoordSystemAdaptor();
00785   return ( exists( $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} )
00786            ? $cs_a->{'_internal_seq_region_mapping'}->{$sr_id}
00787            : $sr_id );
00788 }
00789 
00790 #for a given seq_region_id and coord_system, gets the one used in the internal (core) database
00791 sub get_seq_region_id_internal{
00792   my ( $self, $sr_id ) = @_;
00793   my $cs_a = $self->db()->get_CoordSystemAdaptor();
00794   return (  exists $cs_a->{'_external_seq_region_mapping'}->{$sr_id} 
00795             ? $cs_a->{'_external_seq_region_mapping'}->{$sr_id} 
00796             : $sr_id);
00797 }
00798 
00799 #
00800 # Helper function containing some common feature storing functionality
00801 #
00802 # Given a Feature this will return a copy (or the same feature if no changes 
00803 # to the feature are needed) of the feature which is relative to the start
00804 # of the seq_region it is on. The seq_region_id of the seq_region it is on
00805 # is also returned.
00806 #
00807 # This method will also ensure that the database knows which coordinate
00808 # systems that this feature is stored in.
00809 #
00810 
00811 sub _pre_store {
00812   my $self    = shift;
00813   my $feature = shift;
00814 
00815   if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
00816     throw('Expected Feature argument.');
00817   }
00818   my $slice = $feature->slice();
00819 
00820   $self->_check_start_end_strand($feature->start(),$feature->end(),
00821                                  $feature->strand(), $slice);
00822 
00823 
00824   my $db = $self->db();
00825 
00826   my $slice_adaptor = $db->get_SliceAdaptor();
00827 
00828   if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))  ) {
00829     throw('Feature must be attached to Slice to be stored.');
00830   }
00831 
00832   # make sure feature coords are relative to start of entire seq_region
00833 
00834   if($slice->start != 1 || $slice->strand != 1) {
00835     #move feature onto a slice of the entire seq_region
00836     $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
00837                                              $slice->seq_region_name(),
00838                                              undef, #start
00839                                              undef, #end
00840                                              undef, #strand
00841                                              $slice->coord_system->version());
00842 
00843     $feature = $feature->transfer($slice);
00844 
00845     if(!$feature) {
00846       throw('Could not transfer Feature to slice of ' .
00847             'entire seq_region prior to storing');
00848     }
00849   }
00850 
00851   # Ensure this type of feature is known to be stored in this coord system.
00852   my $cs = $slice->coord_system;
00853 
00854   my ($tab) = $self->_tables();
00855   my $tabname = $tab->[0];
00856 
00857   my $mcc = $db->get_MetaCoordContainer();
00858 
00859   $mcc->add_feature_type($cs, $tabname, $feature->length);
00860 
00861   my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
00862 
00863   if(!$seq_region_id) {
00864     throw('Feature is associated with seq_region which is not in this DB.');
00865   }
00866 
00867   return ($feature, $seq_region_id);
00868 }
00869 
00870 
00871 # The same function as _pre_store
00872 # This one is used to store user uploaded features in XXX_userdata db
00873 
00874 sub _pre_store_userdata {
00875   my $self    = shift;
00876   my $feature = shift;
00877 
00878   if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
00879     throw('Expected Feature argument.');
00880   }
00881 
00882   my $slice = $feature->slice();
00883   my $slice_adaptor = $slice->adaptor;
00884   
00885   $self->_check_start_end_strand($feature->start(),$feature->end(),
00886                                  $feature->strand(), $slice);
00887 
00888 
00889   if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
00890     throw('Feature must be attached to Slice to be stored.');
00891   }
00892 
00893   # make sure feature coords are relative to start of entire seq_region
00894 
00895   if($slice->start != 1 || $slice->strand != 1) {
00896     #move feature onto a slice of the entire seq_region
00897     $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
00898                                              $slice->seq_region_name(),
00899                                              undef, #start
00900                                              undef, #end
00901                                              undef, #strand
00902                                              $slice->coord_system->version());
00903 
00904     $feature = $feature->transfer($slice);
00905 
00906     if(!$feature) {
00907       throw('Could not transfer Feature to slice of ' .
00908             'entire seq_region prior to storing');
00909     }
00910   }
00911 
00912   # Ensure this type of feature is known to be stored in this coord system.
00913   my $cs = $slice->coord_system;
00914 
00915   my ($tab) = $self->_tables();
00916   my $tabname = $tab->[0];
00917 
00918   my $db = $self->db;
00919   my $mcc = $db->get_MetaCoordContainer();
00920 
00921   $mcc->add_feature_type($cs, $tabname, $feature->length);
00922 
00923   my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
00924 
00925   if(!$seq_region_id) {
00926     throw('Feature is associated with seq_region which is not in this DB.');
00927   }
00928 
00929   return ($feature, $seq_region_id);
00930 }
00931 
00932 
00933 #
00934 # helper function used to validate start/end/strand and 
00935 # hstart/hend/hstrand etc.
00936 #
00937 sub _check_start_end_strand {
00938   my $self = shift;
00939   my $start = shift;
00940   my $end   = shift;
00941   my $strand = shift;
00942   my $slice = shift;
00943 
00944   #
00945   # Make sure that the start, end, strand are valid
00946   #
00947   if(int($start) != $start) {
00948     throw("Invalid Feature start [$start].  Must be integer.");
00949   }
00950   if(int($end) != $end) {
00951     throw("Invalid Feature end [$end]. Must be integer.");
00952   }
00953   if(int($strand) != $strand || $strand < -1 || $strand > 1) {
00954     throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
00955   }
00956   if($end < $start && !$slice->is_circular()) {
00957     throw("Invalid Feature start/end [$start/$end]. Start must be less " .
00958           "than or equal to end.");
00959   }
00960 
00961   return 1;
00962 }
00963 
00964 
00965 #
00966 # Given a list of features checks if they are in the correct coord system
00967 # by looking at the first features slice.  If they are not then they are
00968 # converted and placed on the slice.
00969 #
00970 sub _remap {
00971   my ( $self, $features, $mapper, $slice ) = @_;
00972 
00973   #check if any remapping is actually needed
00974   if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
00975                     $features->[0]->slice == $slice)) {
00976     return $features;
00977   }
00978 
00979   #remapping has not been done, we have to do our own conversion from
00980   #to slice coords
00981 
00982   my @out;
00983 
00984   my $slice_start = $slice->start();
00985   my $slice_end   = $slice->end();
00986   my $slice_strand = $slice->strand();
00987   my $slice_cs    = $slice->coord_system();
00988 
00989   my ($seq_region, $start, $end, $strand);
00990 
00991   my $slice_seq_region_id = $slice->get_seq_region_id();
00992   my $slice_seq_region = $slice->seq_region_name();
00993 
00994   foreach my $f (@$features) {
00995     #since feats were obtained in contig coords, attached seq is a contig
00996     my $fslice = $f->slice();
00997     if(!$fslice) {
00998       throw("Feature does not have attached slice.\n");
00999     }
01000     my $fseq_region = $fslice->seq_region_name();
01001     my $fseq_region_id = $fslice->get_seq_region_id();
01002     my $fcs = $fslice->coord_system();
01003 
01004     if(!$slice_cs->equals($fcs)) {
01005       #slice of feature in different coord system, mapping required
01006 
01007       ($seq_region, $start, $end, $strand) =
01008         $mapper->fastmap($fseq_region_id,$f->start(),$f->end(),$f->strand(),$fcs);
01009 
01010       # undefined start means gap
01011       next if(!defined $start);
01012     } else {
01013       $start      = $f->start();
01014       $end        = $f->end();
01015       $strand     = $f->strand();
01016       $seq_region = $f->slice->seq_region_name();
01017     }
01018     
01019     # maps to region outside desired area
01020     next if ($start > $slice_end) || ($end < $slice_start) || 
01021       ($slice_seq_region ne $seq_region);
01022 
01023     #shift the feature start, end and strand in one call
01024     if($slice_strand == -1) {
01025       $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
01026     } else {
01027       $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
01028     }
01029 
01030     $f->slice($slice);
01031 
01032     push @out,$f;
01033   }
01034 
01035   return \@out;
01036 }
01037 
01038 
01039 #
01040 # Given a logic name and an existing constraint this will
01041 # add an analysis table constraint to the feature.  Note that if no
01042 # analysis_id exists in the columns of the primary table then no
01043 # constraint is added at all
01044 #
01045 sub _logic_name_to_constraint {
01046   my $self = shift;
01047   my $constraint = shift;
01048   my $logic_name = shift;
01049 
01050   return $constraint if(!$logic_name);
01051 
01052   #make sure that an analysis_id exists in the primary table
01053   my ($prim_tab) = $self->_tables();
01054   my $prim_synonym = $prim_tab->[1];
01055 
01056   my $found_analysis=0;
01057   foreach my $col ($self->_columns) {
01058     my ($syn,$col_name) = split(/\./,$col);
01059     next if($syn ne $prim_synonym);
01060     if($col_name eq 'analysis_id') {
01061       $found_analysis = 1;
01062       last;
01063     }
01064   }
01065 
01066   if(!$found_analysis) {
01067     warning("This feature is not associated with an analysis.\n" .
01068             "Ignoring logic_name argument = [$logic_name].\n");
01069     return $constraint;
01070   }
01071 
01072   my $aa = $self->db->get_AnalysisAdaptor();
01073   my $an = $aa->fetch_by_logic_name($logic_name);
01074 
01075   if ( !defined($an) ) {
01076     return undef;
01077   }
01078 
01079   my $an_id = $an->dbID();
01080 
01081   $constraint .= ' AND' if($constraint);
01082   $constraint .= " ${prim_synonym}.analysis_id = $an_id";
01083   return $constraint;
01084 }
01085 
01086 
01087 =head2 store
01088 
01089   Arg [1]    : list of Bio::EnsEMBL::SeqFeature
01090   Example    : $adaptor->store(@feats);
01091   Description: ABSTRACT  Subclasses are responsible for implementing this 
01092                method.  It should take a list of features and store them in 
01093                the database.
01094   Returntype : none
01095   Exceptions : thrown method is not implemented by subclass
01096   Caller     : general
01097   Status     : At Risk
01098              : throws if called.
01099 
01100 =cut
01101 
01102 sub store{
01103   my $self = @_;
01104 
01105   throw("Abstract method store not defined by implementing subclass\n");
01106 }
01107 
01108 
01109 =head2 remove
01110 
01111   Arg [1]    : A feature $feature 
01112   Example    : $feature_adaptor->remove($feature);
01113   Description: This removes a feature from the database.  The table the
01114                feature is removed from is defined by the abstract method
01115                _tablename, and the primary key of the table is assumed
01116                to be _tablename() . '_id'.  The feature argument must 
01117                be an object implementing the dbID method, and for the
01118                feature to be removed from the database a dbID value must
01119                be returned.
01120   Returntype : none
01121   Exceptions : thrown if $feature arg does not implement dbID(), or if
01122                $feature->dbID is not a true value
01123   Caller     : general
01124   Status     : Stable
01125 
01126 =cut
01127 
01128 
01129 sub remove {
01130   my ($self, $feature) = @_;
01131 
01132   if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
01133     throw('Feature argument is required');
01134   }
01135 
01136   if(!$feature->is_stored($self->db)) {
01137     throw("This feature is not stored in this database");
01138   }
01139 
01140   my @tabs = $self->_tables;
01141   my ($table) = @{$tabs[0]};
01142 
01143   my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
01144   $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
01145   $sth->execute();
01146 
01147   #unset the feature dbID ad adaptor
01148   $feature->dbID(undef);
01149   $feature->adaptor(undef);
01150 
01151   return;
01152 }
01153 
01154 
01155 =head2 remove_by_Slice
01156 
01157   Arg [1]    : Bio::Ensembl::Slice $slice
01158   Example    : $feature_adaptor->remove_by_Slice($slice);
01159   Description: This removes features from the database which lie on a region
01160                represented by the passed in slice.  Only features which are
01161                fully contained by the slice are deleted; features which overlap
01162                the edge of the slice are not removed.
01163                The table the features are removed from is defined by
01164                the abstract method_tablename.
01165   Returntype : none
01166   Exceptions : thrown if no slice is supplied
01167   Caller     : general
01168   Status     : Stable
01169 
01170 =cut
01171 
01172 sub remove_by_Slice {
01173   my ($self, $slice) = @_;
01174 
01175   if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
01176     throw("Slice argument is required");
01177   }
01178 
01179   my @tabs = $self->_tables;
01180   my ($table_name) = @{$tabs[0]};
01181 
01182   my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
01183   my $start = $slice->start();
01184   my $end   = $slice->end();
01185 
01186   #
01187   # Delete only features fully on the slice, not overlapping ones
01188   #
01189   my $sth = $self->prepare("DELETE FROM $table_name " .
01190                            "WHERE seq_region_id = ? " .
01191                            "AND   seq_region_start >= ? " .
01192                            "AND   seq_region_end <= ?");
01193 
01194   $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
01195   $sth->bind_param(2,$start,SQL_INTEGER);
01196   $sth->bind_param(3,$end,SQL_INTEGER);
01197   $sth->execute();
01198   $sth->finish();
01199 }
01200 
01201 
01202 #
01203 # Internal function. Allows the max feature length which is normally
01204 # retrieved from the meta_coord table to be overridden.  This allows
01205 # for some significant optimizations to be put in when it is known
01206 # that requested features will not be over a certain size.
01207 #
01208 sub _max_feature_length {
01209   my $self = shift;
01210   return $self->{'_max_feature_length'} = shift if(@_);
01211   return $self->{'_max_feature_length'};
01212 }
01213 
01214 
01215 #
01216 # Lists all seq_region_ids that a particular feature type is found on.
01217 # Useful e.g. for finding out which seq_regions have genes.
01218 # Returns a listref of seq_region_ids.
01219 #
01220 sub _list_seq_region_ids {
01221   my ($self, $table) = @_;
01222   
01223   my @out;
01224   
01225   my $sql = qq(
01226   SELECT DISTINCT
01227             sr.seq_region_id
01228   FROM      seq_region sr,
01229             $table a,
01230             coord_system cs
01231   WHERE     sr.seq_region_id = a.seq_region_id
01232     AND     sr.coord_system_id = cs.coord_system_id
01233     AND     cs.species_id = ?);
01234 
01235   my $sth = $self->prepare($sql);
01236 
01237   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
01238 
01239   $sth->execute();
01240 
01241   while (my ($id) = $sth->fetchrow) {
01242     push(@out, $id);
01243   }
01244 
01245   $sth->finish;
01246 
01247   return \@out;
01248 }
01249 
01250 
01251 =head1 DEPRECATED METHODS
01252 
01253 =cut
01254 
01255 
01256 =head2 fetch_all_by_RawContig_constraint
01257 
01258   Description: DEPRECATED use fetch_all_by_RawContig_constraint instead
01259 
01260 =cut
01261 
01262 sub fetch_all_by_RawContig_constraint {
01263   my $self = shift;
01264   deprecate('Use fetch_all_by_Slice_constraint() instead.');
01265   return $self->fetch_all_by_slice_constraint(@_);
01266 }
01267 
01268 =head2 fetch_all_by_RawContig
01269 
01270   Description: DEPRECATED use fetch_all_by_Slice instead
01271 
01272 =cut
01273 
01274 sub fetch_all_by_RawContig {
01275   my $self = shift;
01276   deprecate('Use fetch_all_by_Slice() instead.');
01277   return $self->fetch_all_by_Slice(@_);
01278 }
01279 
01280 =head2 fetch_all_by_RawContig_and_score
01281 
01282   Description: DEPRECATED use fetch_all_by_Slice_and_score instead
01283 
01284 =cut
01285 
01286 sub fetch_all_by_RawContig_and_score{
01287   my $self = shift;
01288   deprecate('Use fetch_all_by_Slice_and_score() instead.');
01289   return $self->fetch_all_by_Slice_and_score(@_);
01290 }
01291 
01292 =head2 remove_by_RawContig
01293 
01294   Description: DEPRECATED use remove_by_Slice instead
01295 
01296 =cut
01297 
01298 sub remove_by_RawContig {
01299   my $self = shift;
01300   deprecate("Use remove_by_Slice instead");
01301   return $self->remove_by_Slice(@_);
01302 }
01303 
01304 
01305 sub remove_by_analysis_id {
01306   my ($self, $analysis_id) = @_;
01307 
01308   $analysis_id or throw("Must call with analysis id");
01309 
01310   my @tabs = $self->_tables;
01311   my ($tablename) = @{$tabs[0]};
01312 
01313   my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
01314 #  warn "SQL : $sql";
01315       
01316   my $sth = $self->prepare($sql);
01317   $sth->execute();
01318   $sth->finish();
01319 }
01320 
01321 sub remove_by_feature_id {
01322   my ($self, $features_list) = @_;
01323 
01324   my @feats = @$features_list or throw("Must call store with features");
01325 
01326   my @tabs = $self->_tables;
01327   my ($tablename) = @{$tabs[0]};
01328 
01329   my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ', ', @feats;
01330 #  warn "SQL : $sql";
01331       
01332   my $sth = $self->prepare($sql);
01333   $sth->execute();
01334   $sth->finish();
01335 }
01336 
01337 
01338 1;