Archive Ensembl HomeArchive Ensembl Home
SliceAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::SliceAdaptor - A database aware adaptor responsible for
00024 the creation of Slice objects.
00025 
00026 =head1 SYNOPSIS
00027 
00028   use Bio::EnsEMBL::Utils::Slice qw(split_Slices);
00029   use Bio::EnsEMBL::Registry;
00030 
00031   Bio::EnsEMBL::Registry->load_registry_from_db(
00032     -host => 'ensembldb.ensembl.org',
00033     -user => 'anonymous'
00034   );
00035 
00036   $slice_adaptor =
00037     Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
00038 
00039   # get a slice on the entire chromosome X
00040   $chr_slice = $slice_adaptor->fetch_by_region( 'chromosome', 'X' );
00041 
00042   # get a slice for each clone in the database
00043   foreach $cln_slice ( @{ $slice_adaptor->fetch_all('clone') } ) {
00044     # do something with clone
00045   }
00046 
00047   # get a slice which is part of NT_004321
00048   $spctg_slice =
00049     $slice_adaptor->fetch_by_region( 'supercontig', 'NT_004321',
00050     200_000, 600_000 );
00051 
00052   # get all non-redundant slices from the highest possible coordinate
00053   # systems
00054   $slices = $slice_adaptor->fetch_all('toplevel');
00055 
00056   # include non-reference regions
00057   $slices = $slice_adaptor->fetch_all( 'toplevel', undef, 1 );
00058 
00059   # include non-duplicate regions
00060   $slices = $slice_adaptor->fetch_all( 'toplevel', undef, 0, 1 );
00061 
00062   # split up a list of slices into smaller slices
00063   $overlap    = 1000;
00064   $max_length = 1e6;
00065   $slices     = split_Slices( $slices, $max_length, $overlap );
00066 
00067   # store a list of slice names in a file
00068   open( FILE, ">$filename" ) or die("Could not open file $filename");
00069   foreach my $slice (@$slices) {
00070     print FILE $slice->name(), "\n";
00071   }
00072   close FILE;
00073 
00074   # retreive a list of slices from a file
00075   open( FILE, $filename ) or die("Could not open file $filename");
00076   while ( $name = <FILE> ) {
00077     chomp($name);
00078     $slice = $slice_adaptor->fetch_by_name($name);
00079     # do something with slice
00080   }
00081 
00082 =head1 DESCRIPTION
00083 
00084 This module is responsible for fetching Slices representing genomic
00085 regions from a database.  A Details on how slices can be used are in the
00086 Bio::EnsEMBL::Slice module.
00087 
00088 =head1 METHODS
00089 
00090 =cut
00091 
00092 
00093 package Bio::EnsEMBL::DBSQL::SliceAdaptor;
00094 use vars qw(@ISA);
00095 use strict;
00096 
00097 
00098 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00099 use Bio::EnsEMBL::Slice;
00100 use Bio::EnsEMBL::CircularSlice;
00101 use Bio::EnsEMBL::Mapper;
00102 use Bio::EnsEMBL::LRGSlice;
00103 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
00104 
00105 
00106 @ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
00107 
00108 sub new {
00109   my $caller = shift;
00110 
00111   my $class = ref($caller) || $caller;
00112 
00113   my $self = $class->SUPER::new(@_);
00114 
00115   # use a cache which is shared and also used by the assembly
00116   # mapper adaptor
00117 
00118   my $seq_region_cache = $self->db->get_SeqRegionCache();
00119 
00120   $self->{'sr_name_cache'} = $seq_region_cache->{'name_cache'};
00121   $self->{'sr_id_cache'}   = $seq_region_cache->{'id_cache'};
00122 
00123   $self->{'lrg_region_test'} = undef;
00124   my $meta_container = $self->db->get_MetaContainer();
00125   my @values = $meta_container->list_value_by_key("LRG");
00126   if(scalar(@values) and $values[0]->[0]){
00127     $self->{'lrg_region_test'} = $values[0]->[0];
00128   }
00129   return $self;
00130 }
00131 
00132 
00133 =head2 fetch_by_region
00134 
00135   Arg [1]    : string $coord_system_name (optional)
00136                The name of the coordinate system of the slice to be created
00137                This may be a name of an actual coordinate system or an alias
00138                to a coordinate system.  Valid aliases are 'seqlevel' or
00139                'toplevel'.
00140   Arg [2]    : string $seq_region_name
00141                The name of the sequence region that the slice will be
00142                created on.
00143   Arg [3]    : int $start (optional, default = 1)
00144                The start of the slice on the sequence region
00145   Arg [4]    : int $end (optional, default = seq_region length)
00146                The end of the slice on the sequence region
00147   Arg [5]    : int $strand (optional, default = 1)
00148                The orientation of the slice on the sequence region
00149   Arg [6]    : string $version (optional, default = default version)
00150                The version of the coordinate system to use (e.g. NCBI33)
00151   Arg [7]    : boolean $no_fuzz (optional, default = undef (false))
00152                If true (non-zero), do not use "fuzzy matching" (see below).
00153   Example    : $slice = $slice_adaptor->fetch_by_region('chromosome', 'X');
00154                $slice = $slice_adaptor->fetch_by_region('clone', 'AC008066.4');
00155   Description: Retrieves a slice on the requested region.  At a minimum the
00156                name the name of the seq_region to fetch must be provided.
00157 
00158                If no coordinate system name is provided than a slice on the
00159                highest ranked coordinate system with a matching
00160                seq_region_name will be returned.  If a version but no
00161                coordinate system name is provided, the same behaviour will
00162                apply, but only coordinate systems of the appropriate version
00163                are considered.  The same applies if the 'toplevel' coordinate
00164                system is specified, however in this case the version is
00165                ignored.  The coordinate system should always be specified if
00166                it is known, since this is unambiguous and faster.
00167 
00168                Some fuzzy matching is performed if no exact match for
00169                the provided name is found.  This allows clones to be
00170                fetched even when their version is not known.  For
00171                example fetch_by_region('clone', 'AC008066') will
00172                retrieve the sequence_region with name 'AC008066.4'.
00173 
00174                The fuzzy matching can be turned off by setting the
00175                $no_fuzz argument to a true value.
00176 
00177                If the requested seq_region is not found in the database undef
00178                is returned.
00179 
00180   Returntype : Bio::EnsEMBL::Slice or undef
00181   Exceptions : throw if no seq_region_name is provided
00182                throw if invalid coord_system_name is provided
00183                throw if start > end is provided
00184   Caller     : general
00185   Status     : Stable
00186 
00187 =cut
00188 
00189 
00190 #
00191 # ARNE: This subroutine needs simplification!! 
00192 #
00193 sub fetch_by_region {
00194   my ( $self, $coord_system_name, $seq_region_name, $start, $end,
00195        $strand, $version, $no_fuzz )
00196     = @_;
00197 
00198   if ( !defined($start) )  { $start  = 1 }
00199   if ( !defined($strand) ) { $strand = 1 }
00200 
00201   if ( !defined($seq_region_name) ) {
00202     throw('seq_region_name argument is required');
00203   }
00204 
00205   my $cs;
00206   my $csa = $self->db->get_CoordSystemAdaptor();
00207 
00208   if ( defined($coord_system_name) ) {
00209     $cs = $csa->fetch_by_name( $coord_system_name, $version );
00210 
00211     ## REMOVE THESE THREE LINES WHEN STICKLEBACK DB IS FIXED!
00212     ## Anne/ap5 (2007-10-09):
00213     # The problem was that the stickleback genebuild called the
00214     # chromosomes 'groups', which meant they weren't being picked out by
00215     # the karyotype drawing code.  Apparently they are usually called
00216     # 'groups' in the stickleback community, even though they really are
00217     # chromosomes!
00218 
00219     if ( !defined($cs) && $coord_system_name eq 'chromosome' ) {
00220       $cs = $csa->fetch_by_name( 'group', $version );
00221     }
00222 
00223     if ( !defined($cs) ) {
00224       throw( sprintf( "Unknown coordinate system:\n"
00225                         . "name='%s' version='%s'\n",
00226                       $coord_system_name, $version ) );
00227     }
00228 
00229     # fetching by toplevel is same as fetching w/o name or version
00230     if ( $cs->is_top_level() ) {
00231       $cs      = undef;
00232       $version = undef;
00233     }
00234 
00235   } ## end if ( defined($coord_system_name...))
00236 
00237   my $constraint;
00238   my $sql;
00239   my @bind_params;
00240   my $key;
00241 
00242   if ( defined($cs) ) {
00243     $sql = sprintf( "SELECT sr.name, sr.seq_region_id, sr.length, %d "
00244                       . "FROM seq_region sr ",
00245                     $cs->dbID() );
00246 
00247     $constraint = "AND sr.coord_system_id = ?";
00248     push( @bind_params, [ $cs->dbID(), SQL_INTEGER ] );
00249 
00250     $key = "$seq_region_name:" . $cs->dbID();
00251   } else {
00252     $sql =
00253       "SELECT sr.name, sr.seq_region_id, sr.length, cs.coord_system_id "
00254       . "FROM seq_region sr, coord_system cs ";
00255 
00256     $constraint = "AND sr.coord_system_id = cs.coord_system_id "
00257       . "AND cs.species_id = ? ";
00258     push( @bind_params, [ $self->species_id(), SQL_INTEGER ] );
00259 
00260     if ( defined($version) ) {
00261       $constraint .= "AND cs.version = ? ";
00262       push( @bind_params, [ $version, SQL_VARCHAR ] );
00263     }
00264 
00265     $constraint .= "ORDER BY cs.rank ASC";
00266   }
00267 
00268   # check the cache so we only go to the db if necessary
00269   my $length;
00270   my $arr;
00271 
00272   if ( defined($key) ) { $arr = $self->{'sr_name_cache'}->{$key} }
00273 
00274   if ( defined($arr) ) {
00275     $length = $arr->[3];
00276   } else {
00277     my $sth =
00278       $self->prepare( $sql . "WHERE sr.name = ? " . $constraint );
00279 
00280     unshift( @bind_params, [ $seq_region_name, SQL_VARCHAR ] );
00281 
00282     my $pos = 0;
00283     foreach my $param (@bind_params) {
00284       $sth->bind_param( ++$pos, $param->[0], $param->[1] );
00285     }
00286 
00287     $sth->execute();
00288 
00289     if ( $sth->rows() == 0 ) {
00290       $sth->finish();
00291 
00292 
00293       # try synonyms
00294       my $syn_sql_sth = $self->prepare("select s.name from seq_region s, seq_region_synonym ss where s.seq_region_id = ss.seq_region_id and ss.synonym = ?");
00295       $syn_sql_sth->bind_param(1, $seq_region_name, SQL_VARCHAR);
00296       $syn_sql_sth->execute();
00297       my $new_name;
00298       $syn_sql_sth->bind_columns( \$new_name);
00299       if($syn_sql_sth->fetch){
00300     $syn_sql_sth->finish;
00301     return $self->fetch_by_region($coord_system_name, $new_name, $start, $end, $strand, $version, $no_fuzz);
00302       }
00303       $syn_sql_sth->finish;
00304 
00305 
00306       if ($no_fuzz) { return undef }
00307 
00308       # Do fuzzy matching, assuming that we are just missing a version
00309       # on the end of the seq_region name.
00310 
00311       $sth =
00312         $self->prepare( $sql . " WHERE sr.name LIKE ? " . $constraint );
00313 
00314       $bind_params[0] =
00315         [ sprintf( '%s.%%', $seq_region_name ), SQL_VARCHAR ];
00316 
00317       $pos = 0;
00318       foreach my $param (@bind_params) {
00319         $sth->bind_param( ++$pos, $param->[0], $param->[1] );
00320       }
00321 
00322       $sth->execute();
00323 
00324       my $prefix_len = length($seq_region_name) + 1;
00325       my $high_ver   = undef;
00326       my $high_cs    = $cs;
00327 
00328       # Find the fuzzy-matched seq_region with the highest postfix
00329       # (which ought to be a version).
00330 
00331       my ( $tmp_name, $id, $tmp_length, $cs_id );
00332       $sth->bind_columns( \( $tmp_name, $id, $tmp_length, $cs_id ) );
00333 
00334       my $i = 0;
00335 
00336       while ( $sth->fetch ) {
00337         my $tmp_cs =
00338           ( defined($cs) ? $cs : $csa->fetch_by_dbID($cs_id) );
00339 
00340         # cache values for future reference
00341         my $arr = [ $id, $tmp_name, $cs_id, $tmp_length ];
00342         $self->{'sr_name_cache'}->{"$tmp_name:$cs_id"} = $arr;
00343         $self->{'sr_id_cache'}->{"$id"}                = $arr;
00344 
00345         my $tmp_ver = substr( $tmp_name, $prefix_len );
00346 
00347         # skip versions which are non-numeric and apparently not
00348         # versions
00349         if ( $tmp_ver !~ /^\d+$/ ) { next }
00350 
00351         # take version with highest num, if two versions match take one
00352         # with highest ranked coord system (lowest num)
00353         if ( !defined($high_ver)
00354           || $tmp_ver > $high_ver
00355           || ( $tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank )
00356           )
00357         {
00358           $seq_region_name = $tmp_name;
00359           $length          = $tmp_length;
00360           $high_ver        = $tmp_ver;
00361           $high_cs         = $tmp_cs;
00362         }
00363 
00364         $i++;
00365       } ## end while ( $sth->fetch )
00366       $sth->finish();
00367 
00368       # warn if fuzzy matching found more than one result
00369       if ( $i > 1 ) {
00370         warning(
00371           sprintf(
00372             "Fuzzy matching of seq_region_name "
00373               . "returned more than one result.\n"
00374               . "You might want to check whether the returned seq_region\n"
00375               . "(%s:%s) is the one you intended to fetch.\n",
00376             $high_cs->name(), $seq_region_name ) );
00377       }
00378 
00379       $cs = $high_cs;
00380 
00381       # return if we did not find any appropriate match:
00382       if ( !defined($high_ver) ) { return undef }
00383 
00384     } else {
00385 
00386       my ( $id, $cs_id );
00387       ( $seq_region_name, $id, $length, $cs_id ) =
00388         $sth->fetchrow_array();
00389       $sth->finish();
00390 
00391       # cache to speed up for future queries
00392       my $arr = [ $id, $seq_region_name, $cs_id, $length ];
00393       $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr;
00394       $self->{'sr_id_cache'}->{"$id"}                       = $arr;
00395       $cs = $csa->fetch_by_dbID($cs_id);
00396     }
00397   } ## end else [ if ( defined($arr) ) ]
00398 
00399   if ( !defined($end) ) { $end = $length }
00400 
00401   if ( $end + 1 < $start ) {
00402     my $new_sl =
00403       Bio::EnsEMBL::CircularSlice->new(
00404                                    -COORD_SYSTEM    => $cs,
00405                                    -SEQ_REGION_NAME => $seq_region_name,
00406                                    -SEQ_REGION_LENGTH => $length,
00407                                    -START             => $start,
00408                                    -END               => $end,
00409                                    -STRAND            => 1,
00410                                    -ADAPTOR           => $self );
00411 
00412     return $new_sl;
00413   }
00414 
00415   if ( defined( $self->{'lrg_region_test'} )
00416        and substr( $cs->name, 0, 3 ) eq $self->{'lrg_region_test'} )
00417   {
00418     return
00419       Bio::EnsEMBL::LRGSlice->new( -COORD_SYSTEM    => $cs,
00420                                    -SEQ_REGION_NAME => $seq_region_name,
00421                                    -SEQ_REGION_LENGTH => $length,
00422                                    -START             => $start,
00423                                    -END               => $end,
00424                                    -STRAND            => $strand,
00425                                    -ADAPTOR           => $self );
00426   } else {
00427     return
00428       Bio::EnsEMBL::Slice->new_fast( {
00429                                   'coord_system'    => $cs,
00430                                   'seq_region_name' => $seq_region_name,
00431                                   'seq_region_length' => $length,
00432                                   'start'             => $start,
00433                                   'end'               => $end,
00434                                   'strand'            => $strand,
00435                                   'adaptor'           => $self } );
00436   }
00437 } ## end sub fetch_by_region
00438 
00439 =head2 fetch_by_toplevel_location
00440 
00441   Arg [1]     : string $location
00442                 Ensembl formatted location. Can be a format like 
00443                 C<name:start-end>, C<name:start..end>, C<name:start> and
00444                 C<name>.
00445   Arg[2]      : boolean $no_warnings
00446                 Suppress warnings from this method
00447   Example     : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
00448   Description : Converts an Ensembl location/region into the sequence region
00449                 name, start and end and passes them onto C<fetch_by_region()>. 
00450                 The code assumes that the required slice is on the top level
00451                 coordinate system. The code assumes that location formatting
00452                 is not perfect and will perform basic cleanup before parsing.
00453   Returntype  : Bio::EnsEMBL::Slice
00454   Exceptions  : If $location is false otherwise see C<fetch_by_region()>
00455   Caller      : General
00456   Status      : Beta
00457 
00458 =cut
00459 
00460 sub fetch_by_toplevel_location {
00461   my ($self, $location, $no_warnings) = @_;
00462   throw 'You must specify a location' if ! $location;
00463   
00464   my $regex = qr/^(\w+) :? (\d+)? (?:-|[.]{2})? (\d+)?$/xms;
00465   $location =~ s/\s+|,|_//g; #cleanup any nomenclature like 1_000 or 1 000 or 1,000
00466   
00467   if(my ($seq_region_name, $start, $end) = $location =~ $regex) {
00468     if(defined $start && $start < 1) {
00469       warning "Start was less than 1 (${start}) which is not allowed. Resetting to 1"  if ! $no_warnings;
00470       $start = 1;
00471     }
00472     if(defined $end && $end < 1) {
00473       throw "Cannot request negative or 0 end indexes through this interface. Given $end but expected something greater than 0";
00474     }
00475     
00476     my $coord_system_name = 'toplevel';
00477     my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, undef, undef, 0);
00478     return unless $slice;
00479     
00480     my $srl = $slice->seq_region_length();
00481     my $name = $slice->seq_region_name();
00482     if(defined $start && $start > $srl) {
00483       throw "Cannot request a slice whose start ($start) is greater than $srl for $name.";
00484     }
00485     if(defined $end && $end > $srl) {
00486       warning "Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings;
00487       $slice->{end} = $srl;
00488     }
00489     
00490     return $slice;
00491   }
00492   return;
00493 }
00494 
00495 =head2 fetch_by_region_unique
00496 
00497   Arg [1]    : string $coord_system_name (optional)
00498                The name of the coordinate system of the slice to be created
00499                This may be a name of an actual coordinate system or an alias
00500                to a coordinate system.  Valid aliases are 'seqlevel' or
00501                'toplevel'.
00502   Arg [2]    : string $seq_region_name
00503                The name of the sequence region that the slice will be
00504                created on.
00505   Arg [3]    : int $start (optional, default = 1)
00506                The start of the slice on the sequence region
00507   Arg [4]    : int $end (optional, default = seq_region length)
00508                The end of the slice on the sequence region
00509   Arg [5]    : int $strand (optional, default = 1)
00510                The orientation of the slice on the sequence region
00511   Arg [6]    : string $version (optional, default = default version)
00512                The version of the coordinate system to use (e.g. NCBI33)
00513   Arg [7]    : boolean $no_fuzz (optional, default = undef (false))
00514                If true (non-zero), do not use "fuzzy matching" (see below).
00515   Example    : $slice = $slice_adaptor->fetch_by_region_unique('chromosome', 'HSCHR6_MHC_COX');
00516   Description: Retrieves a slice on the requested region but returns only the unique
00517                parts of the slice.  At a minimum the
00518                name the name of the seq_region to fetch must be provided.
00519 
00520                If no coordinate system name is provided than a slice on the
00521                highest ranked coordinate system with a matching
00522                seq_region_name will be returned.  If a version but no
00523                coordinate system name is provided, the same behaviour will
00524                apply, but only coordinate systems of the appropriate version
00525                are considered.  The same applies if the 'toplevel' coordinate
00526                system is specified, however in this case the version is
00527                ignored.  The coordinate system should always be specified if
00528                it is known, since this is unambiguous and faster.
00529 
00530                Some fuzzy matching is performed if no exact match for
00531                the provided name is found.  This allows clones to be
00532                fetched even when their version is not known.  For
00533                example fetch_by_region('clone', 'AC008066') will
00534                retrieve the sequence_region with name 'AC008066.4'.
00535 
00536                The fuzzy matching can be turned off by setting the
00537                $no_fuzz argument to a true value.
00538 
00539                If the requested seq_region is not found in the database undef
00540                is returned.
00541 
00542   Returntype : listref Bio::EnsEMBL::Slice
00543   Exceptions : throw if no seq_region_name is provided
00544                throw if invalid coord_system_name is provided
00545                throw if start > end is provided
00546   Caller     : general
00547   Status     : Stable
00548 
00549 =cut
00550 
00551 sub fetch_by_region_unique {
00552   my $self = shift;
00553 
00554   my @out   = ();
00555   my $slice = $self->fetch_by_region(@_);
00556 
00557 
00558   if ( !exists( $self->{'asm_exc_cache'} ) ) {
00559     $self->_build_exception_cache();
00560   }
00561 
00562   if ( exists(
00563           $self->{'asm_exc_cache'}->{ $self->get_seq_region_id($slice) }
00564        ) )
00565   {
00566     # Dereference symlinked assembly regions.  Take out any regions
00567     # which are symlinked because these are duplicates.
00568     my @projection =
00569       @{ $self->fetch_normalized_slice_projection($slice) };
00570 
00571     foreach my $segment (@projection) {
00572       if ( $segment->[2]->seq_region_name() eq $slice->seq_region_name()
00573         && $segment->[2]->coord_system->equals( $slice->coord_system ) )
00574       {
00575         push( @out, $segment->[2] );
00576       }
00577     }
00578   }
00579 
00580   return \@out;
00581 } ## end sub fetch_by_region_unique
00582 
00583 =head2 fetch_by_name
00584 
00585   Arg [1]    : string $name
00586   Example    : $name  = 'chromosome:NCBI34:X:1000000:2000000:1';
00587                $slice = $slice_adaptor->fetch_by_name($name);
00588                $slice2 = $slice_adaptor->fetch_by_name($slice3->name());
00589   Description: Fetches a slice using a slice name (i.e. the value returned by
00590                the Slice::name method).  This is useful if you wish to 
00591                store a unique identifier for a slice in a file or database or
00592                pass a slice over a network.
00593                Slice::name allows you to serialise/marshall a slice and this
00594                method allows you to deserialise/unmarshal it.
00595 
00596                Returns undef if no seq_region with the provided name exists in
00597                the database.
00598 
00599   Returntype : Bio::EnsEMBL::Slice or undef
00600   Exceptions : throw if incorrent arg provided
00601   Caller     : Pipeline
00602   Status     : Stable
00603 
00604 =cut
00605 
00606 sub fetch_by_name {
00607   my $self = shift;
00608   my $name = shift;
00609 
00610   if(!$name) {
00611     throw("name argument is required");
00612   }
00613 
00614   my @array = split(/:/,$name);
00615 
00616   if(scalar(@array) < 3 || scalar(@array) > 6) {
00617     throw("Malformed slice name [$name].  Format is " .
00618         "coord_system:version:name:start:end:strand");
00619   }
00620 
00621   # Rearrange arguments to suit fetch_by_region
00622 
00623   my @targetarray;
00624 
00625   $targetarray[0]=$array[0];
00626   $targetarray[5]=(($array[1]&&$array[1] ne "")?$array[1]:undef);
00627   $targetarray[1]=(($array[2]&&$array[2] ne "")?$array[2]:undef);
00628   $targetarray[2]=(($array[3]&&$array[3] ne "")?$array[3]:undef);
00629   $targetarray[3]=(($array[4]&&$array[4] ne "")?$array[4]:undef);
00630   $targetarray[4]=(($array[5]&&$array[5] ne "")?$array[5]:undef);
00631   return $self->fetch_by_region(@targetarray);
00632 }
00633 
00634 
00635 
00636 =head2 fetch_by_seq_region_id
00637 
00638   Arg [1]    : string $seq_region_id
00639                The internal identifier of the seq_region to create this slice
00640                on
00641   Arg [2]    : optional start
00642   Arg [3]    : optional end
00643   Arg [4]    : optional strand
00644   Example    : $slice = $slice_adaptor->fetch_by_seq_region_id(34413);
00645   Description: Creates a slice object of an entire seq_region using the
00646                seq_region internal identifier to resolve the seq_region.
00647                Returns undef if no such slice exists.
00648   Returntype : Bio::EnsEMBL::Slice or undef
00649   Exceptions : none
00650   Caller     : general
00651   Status     : Stable
00652 
00653 =cut
00654 
00655 sub fetch_by_seq_region_id {
00656   my ( $self, $seq_region_id, $start, $end, $strand ) = @_;
00657 
00658   my $arr = $self->{'sr_id_cache'}->{$seq_region_id};
00659   my ( $name, $length, $cs, $cs_id );
00660 
00661 
00662   if ( $arr && defined( $arr->[2] ) ) {
00663     ( $name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] );
00664     $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
00665   } else {
00666     my $sth =
00667       $self->prepare(   "SELECT sr.name, sr.coord_system_id, sr.length "
00668                       . "FROM seq_region sr "
00669                       . "WHERE sr.seq_region_id = ? " );
00670 
00671     $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
00672     $sth->execute();
00673 
00674     if ( $sth->rows() == 0 ) { return undef }
00675 
00676     ( $name, $cs_id, $length ) = $sth->fetchrow_array();
00677     $sth->finish();
00678 
00679     $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
00680 
00681     #cache results to speed up repeated queries
00682     my $arr = [ $seq_region_id, $name, $cs_id, $length ];
00683 
00684     $self->{'sr_name_cache'}->{"$name:$cs_id"} = $arr;
00685     $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
00686   }
00687 
00688   return
00689     Bio::EnsEMBL::Slice->new_fast({ 
00690                           'coord_system'     => $cs,
00691                               'seq_region_name'  => $name,
00692                               'seq_region_length'=> $length,
00693                               'start'            => $start || 1,
00694                               'end'              => $end || $length,
00695                               'strand'           => $strand || 1,
00696                               'adaptor'           => $self} );
00697 } ## end sub fetch_by_seq_region_id
00698 
00699 
00700 
00701 =head2 get_seq_region_id
00702 
00703   Arg [1]    : Bio::EnsEMBL::Slice $slice
00704                The slice to fetch a seq_region_id for
00705   Example    : $srid = $slice_adaptor->get_seq_region_id($slice);
00706   Description: Retrieves the seq_region id (in this database) given a slice
00707                Seq region ids are not stored on the slices themselves
00708                because they are intended to be somewhat database independant
00709                and seq_region_ids vary accross databases.
00710   Returntype : int
00711   Exceptions : throw if the seq_region of the slice is not in the db
00712                throw if incorrect arg provided
00713   Caller     : BaseFeatureAdaptor
00714   Status     : Stable
00715 
00716 =cut
00717 
00718 sub get_seq_region_id {
00719   my $self = shift;
00720   my $slice = shift;
00721 
00722   if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
00723     throw('Slice argument is required');
00724   }
00725   
00726   my $seq_region_name = $slice->seq_region_name();
00727   my $key = $seq_region_name.":".$slice->coord_system->dbID();
00728   my $arr = $self->{'sr_name_cache'}->{"$key"};
00729 
00730   if( $arr ) {
00731     return $arr->[0];
00732   }
00733 
00734   my $cs_id = $slice->coord_system->dbID();
00735 
00736   my $sth = $self->prepare("SELECT seq_region_id, length " .
00737                            "FROM seq_region " .
00738                            "WHERE name = ? AND coord_system_id = ?");
00739 
00740   #force seq_region_name cast to string so mysql cannot treat as int
00741   $sth->bind_param(1,"$seq_region_name",SQL_VARCHAR);
00742   $sth->bind_param(2,$cs_id,SQL_INTEGER);
00743   $sth->execute();
00744 
00745   if($sth->rows() != 1) {
00746     throw("Non existant or ambigous seq_region:\n" .
00747           "  coord_system=[$cs_id],\n" .
00748           "  name=[$seq_region_name],\n");
00749 
00750   }
00751 
00752   my($seq_region_id, $length) = $sth->fetchrow_array();
00753   $sth->finish();
00754 
00755   #cache information for future requests
00756   $arr = [ $seq_region_id, $seq_region_name, $cs_id, $length ];
00757 
00758   $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr;
00759   $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
00760 
00761   return $seq_region_id;
00762 }
00763 
00764 
00765 
00766 =head2 fetch_all
00767 
00768   Arg [1]    : string $coord_system_name
00769                The name of the coordinate system to retrieve slices of.
00770                This may be a name of an acutal coordinate system or an alias
00771                to a coordinate system.  Valid aliases are 'seqlevel' or
00772                'toplevel'.
00773   Arg [2]    : string $coord_system_version (optional)
00774                The version of the coordinate system to retrieve slices of
00775   Arg [3]    : bool $include_non_reference (optional)
00776                If this argument is not provided then only reference slices
00777                will be returned. If set, both reference and non refeference
00778                slices will be rerurned.
00779   Arg [4]    : int $include_duplicates (optional)
00780                If set duplicate regions will be returned.
00781                
00782                NOTE: if you do not use this option and you have a PAR
00783                (pseudo-autosomal region) at the beginning of your seq_region
00784                then your slice will not start at position 1, so coordinates
00785                retrieved from this slice might not be what you expected.
00786 
00787   Arg[5]     : bool $include_lrg (optional)  (default 0)
00788                If set lrg regions will be returned aswell.
00789 
00790 
00791   Example    : @chromos = @{$slice_adaptor->fetch_all('chromosome','NCBI33')};
00792                @contigs = @{$slice_adaptor->fetch_all('contig')};
00793 
00794                # get even non-reference regions
00795                @slices = @{$slice_adaptor->fetch_all('toplevel',undef,1)};
00796 
00797                # include duplicate regions (such as pseudo autosomal regions)
00798                @slices = @{$slice_adaptor->fetch_all('toplevel', undef,0,1)};
00799 
00800   Description: Retrieves slices of all seq_regions for a given coordinate
00801                system.  This is analagous to the methods fetch_all which were
00802                formerly on the ChromosomeAdaptor, RawContigAdaptor and
00803                CloneAdaptor classes.  Slices fetched span the entire
00804                seq_regions and are on the forward strand.
00805                If the coordinate system with the provided name and version
00806                does not exist an empty list is returned.
00807                If the coordinate system name provided is 'toplevel', all
00808                non-redundant toplevel slices are returned (note that any
00809                coord_system_version argument is ignored in that case).
00810 
00811                Retrieved slices can be broken into smaller slices using the
00812                Bio::EnsEMBL::Utils::Slice module.
00813 
00814   Returntype : listref of Bio::EnsEMBL::Slices
00815   Exceptions : none
00816   Caller     : general
00817   Status     : Stable
00818 
00819 =cut
00820 
00821 sub fetch_all {
00822   my $self = shift;
00823   my $cs_name = shift;
00824   my $cs_version = shift || '';
00825 
00826   my ($include_non_reference, $include_duplicates, $include_lrg) = @_;
00827 
00828   #
00829   # verify existance of requested coord system and get its id
00830   #
00831   my $csa       = $self->db->get_CoordSystemAdaptor();
00832   my $orig_cs   = $csa->fetch_by_name($cs_name, $cs_version);
00833 
00834   return [] if ( !$orig_cs );
00835 
00836   my %bad_vals=();
00837 
00838 
00839   #
00840   # Get a hash of non reference seq regions
00841   #
00842   if ( !$include_non_reference ) {
00843     my $sth =
00844       $self->prepare(   'SELECT sr.seq_region_id '
00845                       . 'FROM seq_region sr, seq_region_attrib sra, '
00846                       . 'attrib_type at, coord_system cs '
00847                       . 'WHERE at.code = "non_ref" '
00848                       . 'AND sra.seq_region_id = sr.seq_region_id '
00849                       . 'AND at.attrib_type_id = sra.attrib_type_id '
00850                       . 'AND sr.coord_system_id = cs.coord_system_id '
00851                       . 'AND cs.species_id = ?' );
00852 
00853     $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
00854     $sth->execute();
00855 
00856     my ($seq_region_id);
00857     $sth->bind_columns( \$seq_region_id );
00858 
00859     while ( $sth->fetch() ) {
00860       $bad_vals{$seq_region_id} = 1;
00861     }
00862   }
00863 
00864   #
00865   # if we do not want lrg's then add them to the bad list;
00866   #
00867   if ( !$include_lrg ) {
00868     my $sth =
00869       $self->prepare(   'SELECT sr.seq_region_id '
00870                       . 'FROM seq_region sr, seq_region_attrib sra, '
00871                       . 'attrib_type at, coord_system cs '
00872                       . 'WHERE at.code = "LRG" '
00873                       . 'AND sra.seq_region_id = sr.seq_region_id '
00874                       . 'AND at.attrib_type_id = sra.attrib_type_id '
00875                       . 'AND sr.coord_system_id = cs.coord_system_id '
00876                       . 'AND cs.species_id = ?' );
00877 
00878     $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
00879     $sth->execute();
00880 
00881     my ($seq_region_id);
00882     $sth->bind_columns( \$seq_region_id );
00883 
00884     while ( $sth->fetch() ) {
00885       $bad_vals{$seq_region_id} = 1;
00886     }
00887   }
00888 
00889   #
00890   # Retrieve the seq_regions from the database
00891   #
00892 
00893   my $sth;
00894   if ( $orig_cs->is_top_level() ) {
00895     $sth =
00896       $self->prepare(   'SELECT sr.seq_region_id, sr.name, '
00897                       . 'sr.length, sr.coord_system_id '
00898                       . 'FROM seq_region sr, seq_region_attrib sra, '
00899                       . 'attrib_type at, coord_system cs '
00900                       . 'WHERE at.code = "toplevel" '
00901                       . 'AND at.attrib_type_id = sra.attrib_type_id '
00902                       . 'AND sra.seq_region_id = sr.seq_region_id '
00903                       . 'AND sr.coord_system_id = cs.coord_system_id '
00904                       . 'AND cs.species_id = ?' );
00905 
00906     $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
00907     $sth->execute();
00908   } else {
00909     $sth =
00910       $self->prepare(   'SELECT sr.seq_region_id, sr.name, '
00911                       . 'sr.length, sr.coord_system_id '
00912                       . 'FROM seq_region sr '
00913                       . 'WHERE sr.coord_system_id = ?' );
00914 
00915     $sth->bind_param( 1, $orig_cs->dbID, SQL_INTEGER );
00916     $sth->execute();
00917   }
00918 
00919   my ( $seq_region_id, $name, $length, $cs_id );
00920   $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
00921 
00922   my $cache_count = 0;
00923 
00924   my @out;
00925   while($sth->fetch()) {
00926     if(!defined($bad_vals{$seq_region_id})){
00927       my $cs = $csa->fetch_by_dbID($cs_id);
00928 
00929       if(!$cs) {
00930         throw("seq_region $name references non-existent coord_system $cs_id.");
00931       }
00932 
00933       #cache values for future reference, but stop adding to the cache once we
00934       #we know we have filled it up
00935       if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
00936         my $arr = [ $seq_region_id, $name, $cs_id, $length ];
00937 
00938         $self->{'sr_name_cache'}->{"$name:$cs_id"} = $arr;
00939         $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
00940 
00941         $cache_count++;
00942       }
00943 
00944       my $slice = Bio::EnsEMBL::Slice->new_fast({
00945       'start'           => 1,
00946           'end'             => $length,
00947           'strand'          => 1,
00948          'seq_region_name'  => $name,
00949          'seq_region_length'=> $length,
00950          'coord_system'     => $cs,
00951          'adaptor'          => $self});
00952 
00953       if(!defined($include_duplicates) or !$include_duplicates){
00954         # test if this slice *could* have a duplicate (exception) region
00955         $self->_build_exception_cache() if(!exists $self->{'asm_exc_cache'});
00956         if(exists $self->{asm_exc_cache}->{$seq_region_id}) {
00957 
00958           # Dereference symlinked assembly regions.  Take out
00959           # any regions which are symlinked because these are duplicates
00960           my @projection = @{$self->fetch_normalized_slice_projection($slice)};
00961           foreach my $segment ( @projection) {
00962             if($segment->[2]->seq_region_name() eq $slice->seq_region_name() &&
00963                $segment->[2]->coord_system->equals($slice->coord_system)) {
00964               push @out, $segment->[2];
00965             }
00966           }
00967         } else {
00968           # no duplicate regions
00969           push @out, $slice;
00970         }
00971       } else {
00972         # we want duplicates anyway so do not do any checks
00973         push @out, $slice;
00974       }
00975     }
00976   }
00977 
00978   return \@out;
00979 }
00980 
00981 =head2 is_toplevel
00982   Arg        : int seq_region_id 
00983   Example    : my $top = $slice_adptor->is_toplevel($seq_region_id)
00984   Description: Returns 1 if slice is a toplevel slice else 0
00985   Returntype : int
00986   Caller     : Slice method is_toplevel
00987   Status     : At Risk
00988 
00989 =cut
00990 
00991 sub is_toplevel {
00992   my $self = shift;
00993   my $id   = shift;
00994 
00995   my $sth = $self->prepare(
00996             "SELECT at.code from seq_region_attrib sra, attrib_type at "
00997               . "WHERE sra.seq_region_id = ? "
00998               . "AND at.attrib_type_id = sra.attrib_type_id "
00999               . "AND at.code = 'toplevel'" );
01000 
01001   $sth->bind_param( 1, $id, SQL_INTEGER );
01002   $sth->execute();
01003 
01004   my $code;
01005   $sth->bind_columns( \$code );
01006 
01007   while ( $sth->fetch ) {
01008     $sth->finish;
01009     return 1;
01010   }
01011 
01012   $sth->finish;
01013   return 0;
01014 }
01015 
01016 =head2 is_reference
01017   Arg        : int seq_region_id 
01018   Example    : my $reference = $slice_adptor->is_reference($seq_region_id)
01019   Description: Returns 1 if slice is a reference slice else 0
01020   Returntype : int
01021   Caller     : Slice method is_reference
01022   Status     : At Risk
01023 
01024 =cut
01025 
01026 sub is_reference {
01027   my $self = shift;
01028   my $id   = shift;
01029 
01030   my $sth = $self->prepare(
01031             "SELECT at.code from seq_region_attrib sra, attrib_type at "
01032               . "WHERE sra.seq_region_id = ? "
01033               . "AND at.attrib_type_id = sra.attrib_type_id "
01034               . "AND at.code = 'non_ref'" );
01035 
01036   $sth->bind_param( 1, $id, SQL_INTEGER );
01037   $sth->execute();
01038 
01039   my $code;
01040   $sth->bind_columns( \$code );
01041 
01042   while ( $sth->fetch ) {
01043     $sth->finish;
01044     return 0;
01045   }
01046 
01047   $sth->finish;
01048   return 1;
01049 }
01050 
01051 =head2 fetch_by_band
01052 
01053  Title   : fetch_by_band
01054  Usage   :
01055  Function: Does not work please use fetch_by_chr_band
01056  Example :
01057  Returns : Bio::EnsEMBL::Slice
01058  Args    : the band name
01059  Status     : AT RISK
01060 
01061 =cut
01062 
01063 sub fetch_by_band {
01064   my ($self,$band) = @_;
01065 
01066   my $sth = $self->dbc->prepare
01067         ("select s.name,max(k.seq_region_id)-min(k.seq_region_id, min(k.seq_region_start), max(k.seq_region_id) " .
01068          "from karyotype as k " .
01069          "where k.band like ? and k.seq_region_id = s.seq_region_id");
01070 
01071   $sth->bind_param(1,"$band%",SQL_VARCHAR);
01072   $sth->execute();
01073   my ( $seq_region_name, $discrepancy, $seq_region_start, $seq_region_end) = $sth->fetchrow_array;
01074 
01075   if($seq_region_name && $discrepancy>0) {
01076     throw("Band maps to multiple seq_regions");
01077   } else {
01078     return $self->fetch_by_region('toplevel',$seq_region_name,$seq_region_start,$seq_region_end);
01079   }
01080   throw("Band not recognised in database");
01081 }
01082 
01083 =head2 fetch_by_chr_band
01084 
01085  Title   : fetch_by_chr_band
01086  Usage   :
01087  Function: create a Slice representing a series of bands
01088  Example :
01089  Returns : Bio::EnsEMBL::Slice
01090  Args    : the band name
01091  Status     : Stable
01092 
01093 =cut
01094 
01095 sub fetch_by_chr_band {
01096   my ( $self, $chr, $band ) = @_;
01097 
01098   my $chr_slice = $self->fetch_by_region( 'toplevel', $chr );
01099   my $seq_region_id = $self->get_seq_region_id($chr_slice);
01100 
01101   my $sth =
01102     $self->prepare(   'SELECT MIN(k.seq_region_start), '
01103                     . 'MAX(k.seq_region_end) '
01104                     . 'FROM karyotype k '
01105                     . 'WHERE k.seq_region_id = ? '
01106                     . 'AND k.band LIKE ?' );
01107 
01108   $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
01109   $sth->bind_param( 2, "$band%",       SQL_VARCHAR );
01110   $sth->execute();
01111 
01112   my ( $slice_start, $slice_end ) = $sth->fetchrow_array;
01113 
01114   if ( defined $slice_start ) {
01115     return
01116       $self->fetch_by_region( 'toplevel',   $chr,
01117                               $slice_start, $slice_end );
01118   }
01119 
01120   throw("Band not recognised in database");
01121 } ## end sub fetch_by_chr_band
01122 
01123 
01124 
01125 =head2 fetch_by_exon_stable_id
01126 
01127   Arg [1]    : string $exonid
01128                The stable id of the exon around which the slice is 
01129                desired
01130   Arg [2]    : (optional) int $size
01131                The length of the flanking regions the slice should encompass 
01132                on either side of the exon (0 by default)
01133   Example    : $slc = $sa->fetch_by_exon_stable_id('ENSE00000302930',10);
01134   Description: Creates a slice around the region of the specified exon. 
01135                If a context size is given, the slice is extended by that 
01136                number of basepairs on either side of the
01137                transcript.
01138   Returntype : Bio::EnsEMBL::Slice
01139   Exceptions : Thrown if the exon is not in the database.
01140   Caller     : general
01141   Status     : Stable
01142 
01143 =cut
01144 
01145 sub fetch_by_exon_stable_id{
01146   my ($self,$exonid,$size) = @_;
01147 
01148   throw('Exon argument is required.') if(!$exonid);
01149 
01150   my $ea = $self->db->get_ExonAdaptor;
01151   my $exon = $ea->fetch_by_stable_id($exonid);
01152 
01153   throw("Exon [$exonid] does not exist in DB.") if(!$exon);
01154 
01155   return $self->fetch_by_Feature($exon, $size);
01156 }
01157 
01158 =head2 fetch_by_transcript_stable_id
01159 
01160   Arg [1]    : string $transcriptid
01161                The stable id of the transcript around which the slice is 
01162                desired
01163   Arg [2]    : (optional) int $size
01164                The length of the flanking regions the slice should encompass 
01165                on either side of the transcript (0 by default)
01166   Example    : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930',10);
01167   Description: Creates a slice around the region of the specified transcript. 
01168                If a context size is given, the slice is extended by that 
01169                number of basepairs on either side of the
01170                transcript.
01171   Returntype : Bio::EnsEMBL::Slice
01172   Exceptions : Thrown if the transcript is not in the database.
01173   Caller     : general
01174   Status     : Stable
01175 
01176 =cut
01177 
01178 sub fetch_by_transcript_stable_id{
01179   my ($self,$transcriptid,$size) = @_;
01180 
01181   throw('Transcript argument is required.') if(!$transcriptid);
01182 
01183   my $ta = $self->db->get_TranscriptAdaptor;
01184   my $transcript = $ta->fetch_by_stable_id($transcriptid);
01185 
01186   throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
01187 
01188   return $self->fetch_by_Feature($transcript, $size);
01189 }
01190 
01191 
01192 
01193 =head2 fetch_by_transcript_id
01194 
01195   Arg [1]    : int $transcriptid
01196                The unique database identifier of the transcript around which 
01197                the slice is desired
01198   Arg [2]    : (optional) int $size
01199                The length of the flanking regions the slice should encompass 
01200                on either side of the transcript (0 by default)
01201   Example    : $slc = $sa->fetch_by_transcript_id(24, 1000);
01202   Description: Creates a slice around the region of the specified transcript.
01203                If a context size is given, the slice is extended by that
01204                number of basepairs on either side of the
01205                transcript.
01206   Returntype : Bio::EnsEMBL::Slice
01207   Exceptions : throw on incorrect args
01208                throw if transcript is not in database
01209   Caller     : general
01210   Status     : Stable
01211 
01212 =cut
01213 
01214 sub fetch_by_transcript_id {
01215   my ($self,$transcriptid,$size) = @_;
01216 
01217   throw('Transcript id argument is required.') if(!$transcriptid);
01218 
01219   my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
01220   my $transcript = $transcript_adaptor->fetch_by_dbID($transcriptid);
01221 
01222   throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
01223 
01224   return $self->fetch_by_Feature($transcript, $size);
01225 }
01226 
01227 
01228 
01229 =head2 fetch_by_gene_stable_id
01230 
01231   Arg [1]    : string $geneid
01232                The stable id of the gene around which the slice is
01233                desired
01234   Arg [2]    : (optional) int $size
01235                The length of the flanking regions the slice should encompass
01236                on either side of the gene (0 by default)
01237   Example    : $slc = $sa->fetch_by_gene_stable_id('ENSG00000012123',10);
01238   Description: Creates a slice around the region of the specified gene.
01239                If a context size is given, the slice is extended by that
01240                number of basepairs on either side of the gene.
01241                The slice will be created in the genes native coordinate system.
01242   Returntype : Bio::EnsEMBL::Slice
01243   Exceptions : throw on incorrect args
01244                throw if transcript does not exist
01245   Caller     : general
01246   Status     : Stable
01247 
01248 =cut
01249 
01250 sub fetch_by_gene_stable_id {
01251   my ($self,$geneid,$size) = @_;
01252 
01253   throw('Gene argument is required.') if(!$geneid);
01254 
01255   my $gene_adaptor = $self->db->get_GeneAdaptor();
01256   my $gene = $gene_adaptor->fetch_by_stable_id($geneid);
01257 
01258   throw("Gene [$geneid] does not exist in DB.") if(!$gene);
01259 
01260   return $self->fetch_by_Feature($gene, $size);
01261 }
01262 
01263 
01264 
01265 =head2 fetch_by_Feature
01266 
01267   Arg [1]    : Bio::EnsEMBL::Feature $feat
01268                The feature to fetch the slice around
01269   Arg [2]    : int size (optional)
01270                The desired number of flanking basepairs around the feature.
01271                The size may also be provided as a percentage of the feature 
01272                size such as 200% or 80.5%.
01273   Example    : $slice = $slice_adaptor->fetch_by_Feature($feat, 100);
01274   Description: Retrieves a slice around a specific feature.  All this really
01275                does is return a resized version of the slice that the feature
01276                is already on. Note that slices returned from this method
01277                are always on the forward strand of the seq_region regardless of
01278                the strandedness of the feature passed in.
01279   Returntype : Bio::EnsEMBL::Slice
01280   Exceptions : throw if the feature does not have an attached slice
01281                throw if feature argument is not provided
01282   Caller     : fetch_by_gene_stable_id, fetch_by_transcript_stable_id,
01283                fetch_by_gene_id, fetch_by_transcript_id
01284   Status     : Stable
01285 
01286 =cut
01287 
01288 sub fetch_by_Feature{
01289   my ($self, $feature, $size) = @_;
01290 
01291   $size ||= 0;
01292 
01293   if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
01294     throw('Feature argument expected.');
01295   }
01296 
01297   my $slice = $feature->slice();
01298   if(!$slice || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice') )) {
01299     throw('Feature must be attached to a valid slice.');
01300   }
01301 
01302 
01303   my $fstart = $feature->start();
01304   my $fend   = $feature->end();
01305   if(!defined($fstart) || !defined($fend)) {
01306     throw('Feature must have defined start and end.');
01307   }
01308 
01309   #convert the feature slice coordinates to seq_region coordinates
01310   my $slice_start  = $slice->start();
01311   my $slice_end    = $slice->end();
01312   my $slice_strand = $slice->strand();
01313   if($slice_start != 1 || $slice_strand != 1) {
01314     if($slice_strand == 1) {
01315       $fstart = $fstart + $slice_start - 1;
01316       $fend   = $fend   + $slice_start - 1;
01317     } else {
01318       my $tmp_start = $fstart;
01319       $fstart = $slice_end - $fend      + 1;
01320       $fend   = $slice_end - $tmp_start + 1;
01321     }
01322   }
01323 
01324   ## Size may be stored as a %age of the length of the feature
01325   ## Size = 100% gives no context
01326   ## Size = 200% gives context - 50% the size of the feature either side of 
01327   ## feature
01328 
01329   $size = int( ($1-100)/200 * ($fend-$fstart+1) ) if( $size =~/([\d+\.]+)%/ );
01330 
01331   #return a new slice covering the region of the feature
01332   my $S = Bio::EnsEMBL::Slice->new_fast({
01333     'seq_region_name'   => $slice->seq_region_name,
01334     'seq_region_length' => $slice->seq_region_length,
01335     'coord_system'      => $slice->coord_system,
01336     'start'             => $fstart - $size,
01337     'end'               => $fend + $size,
01338     'strand'            => 1,
01339      'adaptor'          => $self});
01340   $S->{'_raw_feature_strand'}  = $feature->strand * $slice_strand if $feature->can('strand');
01341   return $S;
01342 }
01343 
01344 
01345 
01346 =head2 fetch_by_misc_feature_attribute
01347 
01348   Arg [1]    : string $attribute_type
01349                The code of the attribute type
01350   Arg [2]    : (optional) string $attribute_value
01351                The value of the attribute to fetch by
01352   Arg [3]    : (optional) int $size
01353                The amount of flanking region around the misc feature desired.
01354   Example    : $slice = $sa->fetch_by_misc_feature_attribute('superctg',
01355                                                              'NT_030871');
01356                $slice = $sa->fetch_by_misc_feature_attribute('synonym',
01357                                                              'AL00012311',
01358                                                              $flanking);
01359   Description: Fetches a slice around a MiscFeature with a particular
01360                attribute type and value. If no value is specified then
01361                the feature with the particular attribute is used.
01362                If no size is specified then 0 is used.
01363   Returntype : Bio::EnsEMBL::Slice
01364   Exceptions : Throw if no feature with the specified attribute type and value
01365                exists in the database
01366                Warning if multiple features with the specified attribute type
01367                and value exist in the database.
01368   Caller     : webcode
01369   Status     : Stable
01370 
01371 =cut
01372 
01373 sub fetch_by_misc_feature_attribute {
01374   my ($self, $attrib_type_code, $attrib_value, $size) = @_;
01375 
01376   my $mfa = $self->db()->get_MiscFeatureAdaptor();
01377 
01378   my $feats = $mfa->fetch_all_by_attribute_type_value($attrib_type_code,
01379                                                    $attrib_value);
01380 
01381   if(@$feats == 0) {
01382     throw("MiscFeature with $attrib_type_code=$attrib_value does " .
01383           "not exist in DB.");
01384   }
01385 
01386   if(@$feats > 1) {
01387     warning("MiscFeature with $attrib_type_code=$attrib_value is " .
01388             "ambiguous - using first one found.");
01389   }
01390 
01391   my ($feat) = @$feats;
01392 
01393   return $self->fetch_by_Feature($feat, $size);
01394 }
01395 
01396 =head2 fetch_normalized_slice_projection
01397 
01398   Arg [1]    : Bio::EnsEMBL::Slice $slice
01399   Example    :  ( optional )
01400   Description: gives back a project style result. The returned slices 
01401                represent the areas to which there are symlinks for the 
01402                given slice. start, end show which area on given slice is 
01403                symlinked
01404   Returntype : [[start,end,$slice][]]
01405   Exceptions : none
01406   Caller     : BaseFeatureAdaptor
01407   Status     : Stable
01408 
01409 =cut
01410 
01411 
01412 sub fetch_normalized_slice_projection {
01413   my $self = shift;
01414   my $slice = shift;
01415 
01416   my $slice_seq_region_id = $self->get_seq_region_id( $slice );
01417 
01418   $self->_build_exception_cache() if(!exists($self->{'asm_exc_cache'}));
01419 
01420   my $result = $self->{'asm_exc_cache'}->{$slice_seq_region_id};
01421 
01422   $result ||= [];
01423 
01424   my (@haps, @pars);
01425 
01426   foreach my $row (@$result) {
01427     my ( $seq_region_id, $seq_region_start, $seq_region_end,
01428          $exc_type, $exc_seq_region_id, $exc_seq_region_start,
01429          $exc_seq_region_end ) = @$row;
01430 
01431     # need overlapping PAR and all HAPs if any
01432     if( $exc_type eq "PAR" ) {
01433       if( $seq_region_start <= $slice->end() && 
01434           $seq_region_end >= $slice->start() ) {
01435         push( @pars, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
01436                        $exc_seq_region_start, $exc_seq_region_end ] );
01437       }
01438     } else {
01439       push( @haps, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
01440                      $exc_seq_region_start, $exc_seq_region_end ] );
01441     }
01442   }
01443 
01444   if(!@pars && !@haps) {
01445     #just return this slice, there were no haps or pars
01446     return  [bless ( [1,$slice->length, $slice], "Bio::EnsEMBL::ProjectionSegment")];
01447   }
01448 
01449   my @syms;
01450   if( @haps >= 1 ) {
01451     my @sort_haps = sort { $a->[1] <=> $b->[1] } @haps;
01452 
01453     my $count =0;
01454     my $chr_start = 1;
01455     my $hap_start = 1;
01456     my $last = 0;
01457 
01458     my $seq_reg_slice = $self->fetch_by_seq_region_id($slice_seq_region_id);
01459     my $exc_slice = $self->fetch_by_seq_region_id( $sort_haps[0][2] );
01460     my $len1 = $seq_reg_slice->length();
01461     my $len2 = $exc_slice->length();
01462     my $max_len = ($len1 > $len2) ? $len1 : $len2;
01463 
01464     while($count <= scalar(@sort_haps)  and !$last){
01465       my $chr_end;
01466       my $hap_end;
01467       if(defined($sort_haps[$count]) and defined($sort_haps[$count][0]) ){
01468     $hap_end = $sort_haps[$count][0]-1;
01469     $chr_end = $sort_haps[$count][3]-1
01470       }
01471       else{
01472     $last = 1;
01473     $hap_end = $len1;
01474     $chr_end = $len2;
01475     my $diff = ($hap_end-$hap_start)-($chr_end-$chr_start);
01476     if($diff > 0){
01477       push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end+$diff] );  
01478     }
01479     elsif($diff < 0){
01480       push( @syms, [ $hap_start, $hap_end - $diff, $sort_haps[0][2], $chr_start, $chr_end] );  
01481     }
01482     else{
01483       push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );  
01484     }   
01485     next;
01486       }
01487       if($hap_end and $hap_start < $len1){ # if hap at start or end of chromosome
01488     push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );  
01489       }
01490       $chr_start = $chr_end + ($sort_haps[$count][4]-$sort_haps[$count][3]) + 2;
01491       $hap_start = $hap_end + ($sort_haps[$count][1]-$sort_haps[$count][0]) + 2;
01492       $count++;
01493     }
01494 
01495 
01496   }
01497 
01498 
01499   # for now haps and pars should not be both there, but in theory we 
01500   # could handle it here by cleverly merging the pars into the existing syms,
01501   # for now just:
01502   push( @syms, @pars );
01503 
01504   my $mapper = Bio::EnsEMBL::Mapper->new( "sym", "org" );
01505   my $count = 0;
01506   for my $sym ( @syms ) {
01507     $mapper->add_map_coordinates( $slice_seq_region_id, $sym->[0], $sym->[1],
01508                                   1, $sym->[2], $sym->[3], $sym->[4] );
01509   }
01510 
01511   my @linked = $mapper->map_coordinates( $slice_seq_region_id,
01512                                          $slice->start(), $slice->end(),
01513                                          $slice->strand(), "sym" );
01514 
01515   # gaps are regions where there is no mapping to another region
01516   my $rel_start = 1;
01517 
01518   #if there was only one coord and it is a gap, we know it is just the
01519   #same slice with no overlapping symlinks
01520   if(@linked == 1 && $linked[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
01521     return [bless( [1,$slice->length, $slice], "Bio::EnsEMBL::ProjectionSegment" )];
01522   }
01523 
01524   my @out;
01525   for my $coord ( @linked ) {
01526     if( $coord->isa( "Bio::EnsEMBL::Mapper::Gap" )) {
01527     my $exc_slice = Bio::EnsEMBL::Slice->new_fast({
01528         'start'             => $coord->start(),
01529         'end'               => $coord->end(),
01530         'strand'            => $slice->strand(),
01531         'coord_system'      => $slice->coord_system(),
01532          'adaptor'          => $self,
01533          'seq_region_name'  => $slice->seq_region_name(),
01534          'seq_region_length' => $slice->seq_region_length()});
01535       push( @out, bless ( [ $rel_start, $coord->length()+$rel_start-1,
01536                         $exc_slice ], "Bio::EnsEMBL::ProjectionSegment") );
01537     } else {
01538       my $exc_slice = $self->fetch_by_seq_region_id( $coord->id() );
01539       my $exc2_slice = Bio::EnsEMBL::Slice->new_fast({
01540         
01541          'start'             => $coord->start(),
01542          'end'               => $coord->end(),
01543          'strand'            => $coord->strand(),
01544          'seq_region_name'   => $exc_slice->seq_region_name(),
01545          'seq_region_length' => $exc_slice->seq_region_length(),
01546          'coord_system'      => $exc_slice->coord_system(),
01547          'adaptor'           => $self
01548         });
01549     
01550       push( @out, bless( [ $rel_start, $coord->length() + $rel_start - 1,
01551                     $exc2_slice ], "Bio::EnsEMBL::ProjectionSegment") );
01552     }
01553     $rel_start += $coord->length();
01554   }
01555 
01556   return \@out;
01557 }
01558 
01559 
01560 
01561 
01562 =head2 store
01563 
01564   Arg [1]    : Bio::EnsEMBL::Slice $slice
01565   Arg [2]    : (optional) $seqref reference to a string
01566                The sequence associated with the slice to be stored.
01567   Example    : $slice = Bio::EnsEMBL::Slice->new(...);
01568                $seq_region_id = $slice_adaptor->store($slice, \$sequence);
01569   Description: This stores a slice as a sequence region in the database
01570                and returns the seq region id. The passed in slice must
01571                start at 1, and must have a valid seq_region name and coordinate
01572                system. The attached coordinate system must already be stored in
01573                the database.  The sequence region is assumed to start at 1 and
01574                to have a length equalling the length of the slice.  The end of
01575                the slice must equal the seq_region_length.
01576                If the slice coordinate system is the sequence level coordinate
01577                system then the seqref argument must also be passed.  If the
01578                slice coordinate system is NOT a sequence level coordinate
01579                system then the sequence argument cannot be passed.
01580   Returntype : int 
01581   Exceptions : throw if slice has no coord system.
01582                throw if slice coord system is not already stored.
01583                throw if slice coord system is seqlevel and no sequence is 
01584                      provided.
01585                throw if slice coord system is not seqlevel and sequence is
01586                      provided.
01587                throw if slice does not start at 1
01588                throw if sequence is provided and the sequence length does not
01589                      match the slice length.
01590                throw if the SQL insert fails (e.g. on duplicate seq region)
01591                throw if slice argument is not passed
01592                throw if the slice end is not equal to seq_region_length
01593   Caller     : database loading scripts
01594   Status     : Stable
01595 
01596 =cut
01597 
01598 
01599 
01600 sub store {
01601   my $self = shift;
01602   my $slice = shift;
01603   my $seqref = shift;
01604 
01605   #
01606   # Get all of the sanity checks out of the way before storing anything
01607   #
01608 
01609   if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
01610     throw('Slice argument is required');
01611   }
01612 
01613   my $cs = $slice->coord_system();
01614   throw("Slice must have attached CoordSystem.") if(!$cs);
01615 
01616   my $db = $self->db();
01617   if(!$cs->is_stored($db)) {
01618     throw("Slice CoordSystem must already be stored in DB.") 
01619   }
01620 
01621   if($slice->start != 1 || $slice->strand != 1) {
01622     throw("Slice must have start==1 and strand==1.");
01623   }
01624 
01625   if($slice->end() != $slice->seq_region_length()) {
01626     throw("Slice must have end==seq_region_length");
01627   }
01628 
01629   my $sr_len = $slice->length();
01630   my $sr_name  = $slice->seq_region_name();
01631 
01632   if(!$sr_name) {
01633     throw("Slice must have valid seq region name.");
01634   }
01635 
01636   if($cs->is_sequence_level()) {
01637     if(!$seqref) {
01638       throw("Must provide sequence for sequence level coord system.");
01639     }
01640     if(ref($seqref) ne 'SCALAR') {
01641       throw("Sequence must be a scalar reference.");
01642     }
01643     my $seq_len = length($$seqref);
01644 
01645     if($seq_len != $sr_len) {
01646       throw("Sequence length ($seq_len) must match slice length ($sr_len).");
01647     }
01648   } else {
01649     if($seqref) {
01650       throw("Cannot provide sequence for non-sequence level seq regions.");
01651     }
01652   }
01653 
01654   #store the seq_region
01655 
01656   my $sth = $db->dbc->prepare("INSERT INTO seq_region " .
01657                          "SET    name = ?, " .
01658                          "       length = ?, " .
01659                          "       coord_system_id = ?" );
01660 
01661   $sth->bind_param(1,$sr_name,SQL_VARCHAR);
01662   $sth->bind_param(2,$sr_len,SQL_INTEGER);
01663   $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
01664 
01665   $sth->execute();
01666 
01667   my $seq_region_id = $sth->{'mysql_insertid'};
01668 
01669   if(!$seq_region_id) {
01670     throw("Database seq_region insertion failed.");
01671   }
01672 
01673   if($cs->is_sequence_level()) {
01674     #store sequence if it was provided
01675     my $seq_adaptor = $db->get_SequenceAdaptor();
01676     $seq_adaptor->store($seq_region_id, $$seqref);
01677   }
01678 
01679   #synonyms
01680   if(defined($slice->{'synonym'})){
01681     foreach my $syn (@{$slice->{'synonym'}} ){
01682       $syn->seq_region_id($seq_region_id); # set the seq_region_id
01683       $syn->adaptor->store($syn);
01684     }
01685   }
01686   
01687   
01688   $slice->adaptor($self);
01689 
01690   return $seq_region_id;
01691 }
01692 
01693 
01694 =head2 store_assembly
01695 
01696   Arg [1]    : Bio::EnsEMBL::Slice $asm_slice
01697   Arg [2]    : Bio::EnsEMBL::Slice $cmp_slice
01698   Example    : $asm = $slice_adaptor->store_assembly( $slice1, $slice2 );
01699   Description: Creates an entry in the analysis table based on the 
01700                coordinates of the two slices supplied. Returns a string 
01701                representation of the assembly that gets created.
01702   Returntype : string
01703   Exceptions : throw if either slice has no coord system (cs).
01704                throw unless the cs rank of the asm_slice is lower than the 
01705                cmp_slice.
01706                throw if there is no mapping path between coord systems
01707                throw if the lengths of each slice are not equal
01708                throw if there are existing mappings between either slice
01709                and the oposite cs
01710   Caller     : database loading scripts
01711   Status     : Experimental
01712 
01713 =cut
01714 
01715 sub store_assembly{
01716   my $self = shift;
01717   my $asm_slice = shift;
01718   my $cmp_slice = shift;
01719 
01720   #
01721   # Get all of the sanity checks out of the way before storing anything
01722   #
01723 
01724   if(!ref($asm_slice) || !($asm_slice->isa('Bio::EnsEMBL::Slice') or $asm_slice->isa('Bio::EnsEMBL::LRGSlice'))) {
01725     throw('Assembled Slice argument is required');
01726   }
01727   if(!ref($cmp_slice) || !($cmp_slice->isa('Bio::EnsEMBL::Slice') or $cmp_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
01728     throw('Assembled Slice argument is required');
01729   }
01730 
01731   my $asm_cs = $asm_slice->coord_system();
01732   throw("Slice must have attached CoordSystem.") if(!$asm_cs);
01733   my $cmp_cs = $cmp_slice->coord_system();
01734   throw("Slice must have attached CoordSystem.") if(!$cmp_cs);
01735 
01736   unless( $asm_cs->rank < $cmp_cs->rank ){
01737     throw("Assembled Slice CoordSystem->rank must be lower than ".
01738           "the component Slice Coord_system" );
01739   }
01740 
01741   my @path =
01742     @{ $asm_cs->adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
01743 
01744   if ( !@path ) {
01745     throw("No mapping path defined between "
01746         . $asm_cs->name() . " and "
01747         . $cmp_cs->name() );
01748   }
01749 
01750   if( $asm_slice->length != $cmp_slice->length ){
01751     throw("The lengths of the assembled and component slices are not equal" );
01752   }
01753 
01754   # For now we disallow any existing mappings between the asm slice and cmp
01755   # CoordSystem and vice-versa. 
01756   # Some cases of multiple mappings may be allowable by the API, but their 
01757   # logic needs to be coded below.
01758 
01759   my $asm_proj = $asm_slice->project( $cmp_cs->name, $cmp_cs->version );
01760   if( @$asm_proj ){
01761     throw("Regions of the assembled slice are already assembled ".
01762           "into the component CoordSystem" ); 
01763   }
01764   my $cmp_proj = $cmp_slice->project( $asm_cs->name, $asm_cs->version );
01765   if( @$cmp_proj ){
01766     throw("Regions of the component slice are already assembled ".
01767           "into the assembled CoordSystem" ); 
01768   }
01769 
01770   #
01771   # Checks complete. Store the data
01772   #
01773   my $sth = $self->db->dbc->prepare
01774       ("INSERT INTO assembly " .
01775        "SET     asm_seq_region_id = ?, " .
01776        "        cmp_seq_region_id = ?, " .
01777        "        asm_start = ?, " .
01778        "        asm_end   = ?, " .
01779        "        cmp_start = ?, " .
01780        "        cmp_end   = ?, " .
01781        "        ori       = ?" );
01782 
01783   my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
01784   my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
01785   my $ori = $asm_slice->strand * $cmp_slice->strand;
01786 
01787   $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
01788   $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
01789   $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
01790   $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
01791   $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
01792   $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
01793   $sth->bind_param(7,$ori,SQL_INTEGER);
01794 
01795   $sth->execute();
01796 
01797   #use Data::Dumper qw( Dumper );
01798   #warn Dumper( $self->db->{seq_region_cache} );
01799   #$self->db->{seq_region_cache} = undef;
01800   #$self->_cache_seq_regions();
01801 
01802   my $ama = $self->db->get_AssemblyMapperAdaptor();
01803   $ama->delete_cache();
01804 
01805 
01806   return $asm_slice->name . "<>" . $cmp_slice->name;
01807 
01808 }
01809 
01810 
01811 =head2 prepare
01812 
01813   Arg [1]    : String $sql
01814   Example    :  ( optional )
01815   Description: overrides the default adaptor prepare method.
01816                All slice sql will usually use the dna_db.
01817   Returntype : DBD::sth 
01818   Exceptions : none
01819   Caller     : internal, convenience method
01820   Status     : Stable
01821 
01822 =cut
01823 
01824 sub prepare {
01825   my ( $self, $sql ) = @_;
01826   return $self->db()->dnadb()->dbc->prepare($sql);
01827 }
01828 
01829 sub _build_exception_cache {
01830   my $self = shift;
01831 
01832   # build up a cache of the entire assembly exception table
01833   # it should be small anyway
01834   my $sth =
01835     $self->prepare( 'SELECT ae.seq_region_id, ae.seq_region_start, '
01836               . 'ae.seq_region_end, ae.exc_type, ae.exc_seq_region_id, '
01837               . 'ae.exc_seq_region_start, ae.exc_seq_region_end '
01838               . 'FROM assembly_exception ae, '
01839               . 'seq_region sr, coord_system cs '
01840               . 'WHERE sr.seq_region_id = ae.seq_region_id '
01841               . 'AND sr.coord_system_id = cs.coord_system_id '
01842               . 'AND cs.species_id = ?' );
01843 
01844   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
01845   $sth->execute();
01846 
01847   my %hash;
01848   $self->{'asm_exc_cache'} = \%hash;
01849 
01850   my $row;
01851   while ( $row = $sth->fetchrow_arrayref() ) {
01852     my @result = @$row;
01853     $hash{ $result[0] } ||= [];
01854     push( @{ $hash{ $result[0] } }, \@result );
01855   }
01856   $sth->finish();
01857 } ## end sub _build_exception_cache
01858 
01859 =head2 cache_toplevel_seq_mappings
01860 
01861   Args       : none
01862   Example    : $slice_adaptor->cache_toplevel_seq_mappings();
01863   Description: caches all the assembly mappings needed for genes
01864   Returntype : None
01865   Exceptions : None
01866   Caller     : general
01867   Status     : At Risk
01868              : New experimental code
01869 
01870 =cut
01871 
01872 sub cache_toplevel_seq_mappings {
01873   my ($self) = @_;
01874 
01875   # Get the sequence level to map too
01876 
01877   my $sql = (<<SSQL);
01878   SELECT    name
01879   FROM  coord_system
01880   WHERE attrib like "%sequence_level%"
01881   AND   species_id = ?
01882 SSQL
01883 
01884   my $sth = $self->prepare($sql);
01885   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
01886   $sth->execute();
01887 
01888   my $sequence_level = $sth->fetchrow_array();
01889 
01890   $sth->finish();
01891 
01892   my $csa = $self->db->get_CoordSystemAdaptor();
01893   my $ama = $self->db->get_AssemblyMapperAdaptor();
01894 
01895   my $cs1 = $csa->fetch_by_name($sequence_level);
01896 
01897   #get level to map too.
01898 
01899   $sql = (<<LSQL);
01900   SELECT DISTINCT(cs.name)
01901   FROM  seq_region sr,
01902         seq_region_attrib sra,
01903         attrib_type at,
01904         coord_system cs
01905   WHERE sra.seq_region_id = sr.seq_region_id
01906   AND   sra.attrib_type_id = at.attrib_type_id
01907   AND   at.code = "toplevel"
01908   AND   cs.coord_system_id = sr.coord_system_id
01909   AND   cs.species_id = ?
01910 LSQL
01911 
01912   $sth = $self->prepare($sql);
01913   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
01914   $sth->execute();
01915 
01916   while ( my $csn = $sth->fetchrow_array() ) {
01917     if ( $csn eq $sequence_level ) { next }
01918     my $cs2 = $csa->fetch_by_name($csn);
01919     my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
01920     $am->register_all();
01921   }
01922 
01923 } ## end sub cache_toplevel_seq_mappings
01924 
01925 
01926 sub _build_circular_slice_cache {
01927   my $self = shift;
01928 
01929   # build up a cache of circular sequence region ids
01930   my $sth =
01931         $self->prepare( "SELECT sra.seq_region_id FROM seq_region_attrib sra "
01932             . "INNER JOIN attrib_type at ON sra.attrib_type_id = at.attrib_type_id "
01933             . "INNER JOIN seq_region sr ON sra.seq_region_id = sr.seq_region_id "
01934             . "INNER JOIN coord_system cs ON sr.coord_system_id = cs.coord_system_id "
01935             . "WHERE code = 'circular_seq' and cs.species_id = ?");
01936 
01937   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
01938   $sth->execute();
01939 
01940   my $id;
01941   my %hash;
01942   if ( ($id) = $sth->fetchrow_array() ) {
01943     $self->{'circular_sr_id_cache'} = \%hash;
01944         $self->{'is_circular'} = 1;
01945     $hash{ $id } = $id;
01946     while ( ($id) = $sth->fetchrow_array() ) {
01947             $hash{ $id } = $id;
01948     }
01949   } else {
01950     $self->{'is_circular'} = 0;
01951   }
01952   $sth->finish();
01953 } ## end _build_circular_slice_cache
01954 
01955 
01956 #####################################
01957 # sub DEPRECATED METHODs
01958 #####################################
01959 
01960 =head2 fetch_by_mapfrag
01961 
01962  Function: DEPRECATED use fetch_by_misc_feature_attribute('synonym',$mapfrag)
01963 
01964 =cut
01965 
01966 sub fetch_by_mapfrag{
01967    my ($self,$mymapfrag,$flag,$size) = @_;
01968    deprecate('Use fetch_by_misc_feature_attribute instead');
01969    $flag ||= 'fixed-width'; # alt.. 'context'
01970    $size ||= $flag eq 'fixed-width' ? 100000 : 0;
01971    return $self->fetch_by_misc_feature_attribute('synonym',$mymapfrag,$size);
01972 }
01973 
01974 
01975 
01976 =head2 fetch_by_chr_start_end
01977 
01978   Description: DEPRECATED use fetch_by_region instead
01979 
01980 =cut
01981 
01982 sub fetch_by_chr_start_end {
01983   my ($self,$chr,$start,$end) = @_;
01984   deprecate('Use fetch_by_region() instead');
01985 
01986   #assume that by chromosome the user actually meant top-level coord
01987   #system since this is the old behaviour of this deprecated method
01988   my $csa = $self->db->get_CoordSystemAdaptor();
01989   my ($cs) = @{$csa->fetch_all()}; # get the highest coord system
01990 
01991   return $self->fetch_by_region($cs->name,$chr,$start,$end,1,$cs->version);
01992 }
01993 
01994 
01995 
01996 =head2 fetch_by_contig_name
01997 
01998   Description: Deprecated. Use fetch_by_region(), Slice::project(),
01999                Slice::expand() instead
02000 
02001 =cut
02002 
02003 sub fetch_by_contig_name {
02004   my ($self, $name, $size) = @_;
02005 
02006   deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
02007 
02008   #previously wanted chromosomal slice on a given contig.  Assume this means
02009   #a top-level slice on a given seq_region in the seq_level coord system
02010   my $csa = $self->db()->get_CoordSystemAdaptor();
02011   my $seq_level = $csa->fetch_sequence_level();
02012 
02013   my $seq_lvl_slice = $self->fetch_by_region($seq_level->name(), $name);
02014 
02015   if(!$seq_lvl_slice) {
02016     return undef;
02017   }
02018 
02019   my @projection = @{$seq_lvl_slice->project('toplevel')};
02020 
02021   if(@projection != 1) {
02022     warning("$name is mapped to multiple toplevel locations.");
02023   }
02024 
02025   return $projection[0]->[2]->expand($size, $size);
02026 }
02027 
02028 
02029 =head2 fetch_by_clone_accession
02030 
02031   Description: DEPRECATED.  Use fetch_by_region, Slice::project, Slice::expand
02032                instead.
02033 
02034 =cut
02035 
02036 sub fetch_by_clone_accession{
02037   my ($self,$name,$size) = @_;
02038 
02039   deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
02040 
02041   my $csa = $self->db()->get_CoordSystemAdaptor();
02042   my $clone_cs = $csa->fetch_by_name('clone');
02043 
02044   if(!$clone_cs) {
02045     warning('Clone coordinate system does not exist for this species');
02046     return undef;
02047   }
02048 
02049   #this unfortunately needs a version on the end to work
02050   if(! ($name =~ /\./)) {
02051     my $sth =
02052       $self->prepare(  "SELECT sr.name "
02053                      . "FROM   seq_region sr, coord_system cs "
02054                      . "WHERE  cs.name = 'clone' "
02055                      . "AND    cs.coord_system_id = sr.coord_system_id "
02056                      . "AND    sr.name LIKE '$name.%'"
02057                      . "AND    cs.species_id = ?" );
02058 
02059     $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
02060     $sth->execute();
02061 
02062     if(!$sth->rows()) {
02063       $sth->finish();
02064       throw("Clone $name not found in database");
02065     }
02066 
02067     ($name) = $sth->fetchrow_array();
02068 
02069     $sth->finish();
02070   }
02071 
02072   my $clone = $self->fetch_by_region($clone_cs->name(), $name);
02073   return undef if(!$clone);
02074 
02075   my @projection = @{$clone->project('toplevel')};
02076 
02077   if(@projection != 1) {
02078     warning("$name is mapped to multiple locations.");
02079   }
02080 
02081   return $projection[0]->[2]->expand($size, $size);
02082 }
02083 
02084 
02085 =head2 fetch_by_supercontig_name
02086 
02087   Description: DEPRECATED. Use fetch_by_region(), Slice::project() and
02088                Slice::expand() instead
02089 
02090 =cut
02091 
02092 sub fetch_by_supercontig_name {
02093   my ($self,$name, $size) = @_;
02094 
02095   deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
02096 
02097   my $csa = $self->db()->get_CoordSystemAdaptor();
02098   my $sc_level = $csa->fetch_by_name('supercontig');
02099 
02100   if(!$sc_level) {
02101     warning('No supercontig coordinate system exists for this species.');
02102     return undef;
02103   }
02104 
02105   my $sc_slice = $self->fetch_by_region($sc_level->name(),$name);
02106 
02107   return undef if(!$sc_slice);
02108 
02109   my @projection = @{$sc_slice->project('toplevel')};
02110 
02111   if(@projection > 1) {
02112     warning("$name is mapped to multiple locations in toplevel");
02113   }
02114 
02115   return $projection[0]->[2]->expand($size, $size);
02116 }
02117 
02118 
02119 
02120 
02121 =head2 list_overlapping_supercontigs
02122 
02123   Description: DEPRECATED use Slice::project instead
02124 
02125 =cut
02126 
02127 sub list_overlapping_supercontigs {
02128    my ($self,$slice) = @_;
02129 
02130    deprecate('Use Slice::project() instead.');
02131 
02132    my $csa = $self->db()->get_CoordSystemAdaptor();
02133    my $sc_level = $csa->fetch_by_name('supercontig');
02134 
02135    if(!$sc_level) {
02136      warning('No supercontig coordinate system exists for this species.');
02137      return undef;
02138    }
02139 
02140    my @out;
02141    foreach my $seg (@{$slice->project($sc_level->name(), $sc_level->version)}){
02142      push @out, $seg->[2]->seq_region_name();
02143    }
02144 
02145    return \@out;
02146 }
02147 
02148 
02149 
02150 =head2 fetch_by_chr_name
02151 
02152   Description: DEPRECATED. Use fetch by region instead
02153 
02154 =cut
02155 
02156 sub fetch_by_chr_name{
02157    my ($self,$chr_name) = @_;
02158    deprecate('Use fetch_by_region() instead.');
02159 
02160    my $csa = $self->db->get_CoordSystemAdaptor();
02161 
02162    my $top_cs = @{$csa->fetch_all()};
02163 
02164    return $self->fetch_by_region($top_cs->name(),$chr_name,
02165                                  undef,undef,undef,$top_cs->version);
02166 }
02167 
02168 
02169 
02170 
02171 
02172 
02173 1;