Archive Ensembl HomeArchive Ensembl Home
Slice.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::Slice - Arbitary Slice of a genome
00024 
00025 =head1 SYNOPSIS
00026 
00027   $sa = $db->get_SliceAdaptor;
00028 
00029   $slice =
00030     $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
00031 
00032   # get some attributes of the slice
00033   my $seqname = $slice->seq_region_name();
00034   my $start   = $slice->start();
00035   my $end     = $slice->end();
00036 
00037   # get the sequence from the slice
00038   my $seq = $slice->seq();
00039 
00040   # get some features from the slice
00041   foreach $gene ( @{ $slice->get_all_Genes } ) {
00042     # do something with a gene
00043   }
00044 
00045   foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
00046     # do something with dna-dna alignments
00047   }
00048 
00049 =head1 DESCRIPTION
00050 
00051 A Slice object represents a region of a genome.  It can be used to retrieve
00052 sequence or features from an area of interest.
00053 
00054 =head1 METHODS
00055 
00056 =cut
00057 
00058 package Bio::EnsEMBL::Slice;
00059 use vars qw(@ISA);
00060 use strict;
00061 
00062 use Bio::PrimarySeqI;
00063 
00064 
00065 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00066 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
00067 use Bio::EnsEMBL::RepeatMaskedSlice;
00068 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
00069 use Bio::EnsEMBL::ProjectionSegment;
00070 use Bio::EnsEMBL::Registry;
00071 use Bio::EnsEMBL::DBSQL::MergedAdaptor;
00072 
00073 use Bio::EnsEMBL::StrainSlice;
00074 #use Bio::EnsEMBL::IndividualSlice;
00075 #use Bio::EnsEMBL::IndividualSliceFactory;
00076 use Bio::EnsEMBL::Mapper::RangeRegistry;
00077 use Bio::EnsEMBL::SeqRegionSynonym;
00078 use Scalar::Util qw(weaken isweak);
00079 
00080 # use Data::Dumper;
00081 
00082 my $reg = "Bio::EnsEMBL::Registry";
00083 
00084 @ISA = qw(Bio::PrimarySeqI);
00085 
00086 
00087 =head2 new
00088 
00089   Arg [...]  : List of named arguments
00090                Bio::EnsEMBL::CoordSystem COORD_SYSTEM
00091                string SEQ_REGION_NAME,
00092                int    START,
00093                int    END,
00094                int    SEQ_REGION_LENGTH, (optional)
00095                string SEQ (optional)
00096                int    STRAND, (optional, defaults to 1)
00097                Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
00098   Example    : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
00099                                                  -start => 1,
00100                                                  -end => 10000,
00101                                                  -strand => 1,
00102                                                  -seq_region_name => 'X',
00103                                                  -seq_region_length => 12e6,
00104                                                  -adaptor => $slice_adaptor);
00105   Description: Creates a new slice object.  A slice represents a region
00106                of sequence in a particular coordinate system.  Slices can be
00107                used to retrieve sequence and features from an area of
00108                interest in a genome.
00109 
00110                Coordinates start at 1 and are inclusive.  Negative
00111                coordinates or coordinates exceeding the length of the
00112                seq_region are permitted.  Start must be less than or equal.
00113                to end regardless of the strand.
00114 
00115                Slice objects are immutable. Once instantiated their attributes
00116                (with the exception of the adaptor) may not be altered.  To
00117                change the attributes a new slice must be created.
00118   Returntype : Bio::EnsEMBL::Slice
00119   Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
00120   Caller     : general, Bio::EnsEMBL::SliceAdaptor
00121   Status     : Stable
00122 
00123 =cut
00124 
00125 sub new {
00126   my $caller = shift;
00127 
00128   #new can be called as a class or object method
00129   my $class = ref($caller) || $caller;
00130 
00131   my ($seq, $coord_system, $seq_region_name, $seq_region_length,
00132       $start, $end, $strand, $adaptor, $empty) =
00133         rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
00134                       START END STRAND ADAPTOR EMPTY)], @_);
00135 
00136   #empty is only for backwards compatibility
00137   if ($empty) {
00138     deprecate(   "Creation of empty slices is no longer needed"
00139                . "and is deprecated" );
00140      my $self = bless( { 'empty' => 1 }, $class );
00141     $self->adaptor($adaptor);
00142     return $self;
00143   }
00144 
00145   if ( !defined($seq_region_name) ) {
00146     throw('SEQ_REGION_NAME argument is required');
00147   }
00148   if ( !defined($start) ) { throw('START argument is required') }
00149   if ( !defined($end) )   { throw('END argument is required') }
00150 
00151   ## if ( $start > $end + 1 ) {
00152   ##   throw('start must be less than or equal to end+1');
00153   ## }
00154 
00155   if ( !defined($seq_region_length) ) { $seq_region_length = $end }
00156 
00157   if ( $seq_region_length <= 0 ) {
00158     throw('SEQ_REGION_LENGTH must be > 0');
00159   }
00160 
00161   if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
00162     throw('SEQ must be the same length as the defined LENGTH not '
00163         . CORE::length($seq)
00164         . ' compared to '
00165         . ( $end - $start + 1 ) );
00166   }
00167 
00168   if(defined($coord_system)) {
00169    if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
00170      throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
00171    }
00172    if($coord_system->is_top_level()) {
00173      throw('Cannot create slice on toplevel CoordSystem.');
00174    }
00175   } else {
00176    warning("Slice without coordinate system");
00177    #warn(stack_trace_dump());
00178   }
00179 
00180   $strand ||= 1;
00181 
00182   if($strand != 1 && $strand != -1) {
00183     throw('STRAND argument must be -1 or 1');
00184   }
00185 
00186   if(defined($adaptor)) {
00187     if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
00188       throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
00189     }
00190   }
00191 
00192   my $self = bless {'coord_system'      => $coord_system,
00193                 'seq'               => $seq,
00194                 'seq_region_name'   => $seq_region_name,
00195                 'seq_region_length' => $seq_region_length,
00196                 'start'             => int($start),
00197                 'end'               => int($end),
00198                 'strand'            => $strand}, $class;
00199 
00200   $self->adaptor($adaptor);
00201 
00202   return $self;
00203 
00204 }
00205 
00206 =head2 new_fast
00207 
00208   Arg [1]    : hashref to be blessed
00209   Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
00210   Exceptions : none
00211   Returntype : Bio::EnsEMBL::Slice
00212   Caller     : general
00213   Status     : Stable
00214 
00215 =cut
00216 
00217 
00218 sub new_fast {
00219   my $class = shift;
00220   my $hashref = shift;
00221   my $self = bless $hashref, $class;
00222   weaken($self->{adaptor})  if ( ! isweak($self->{adaptor}) );
00223   return $self;
00224 }
00225 
00226 =head2 adaptor
00227 
00228   Arg [1]    : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
00229   Example    : $adaptor = $slice->adaptor();
00230   Description: Getter/Setter for the slice object adaptor used
00231                by this slice for database interaction.
00232   Returntype : Bio::EnsEMBL::DBSQL::SliceAdaptor
00233   Exceptions : thorws if argument passed is not a SliceAdaptor
00234   Caller     : general
00235   Status     : Stable
00236 
00237 =cut
00238 
00239 sub adaptor{
00240    my $self = shift;
00241 
00242    if(@_) {
00243      my $ad = shift;
00244      if(defined($ad)) {
00245        if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
00246          throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
00247        }
00248      }
00249      weaken($self->{'adaptor'} = $ad);
00250    }
00251 
00252    return $self->{'adaptor'};
00253 }
00254 
00255 
00256 
00257 =head2 seq_region_name
00258 
00259   Arg [1]    : none
00260   Example    : $seq_region = $slice->seq_region_name();
00261   Description: Returns the name of the seq_region that this slice is on. For
00262                example if this slice is in chromosomal coordinates the
00263                seq_region_name might be 'X' or '10'.
00264 
00265                This function was formerly named chr_name, but since slices can
00266                now be on coordinate systems other than chromosomal it has been
00267                changed.
00268   Returntype : string
00269   Exceptions : none
00270   Caller     : general
00271   Status     : Stable
00272 
00273 =cut
00274 
00275 sub seq_region_name {
00276   my $self = shift;
00277   return $self->{'seq_region_name'};
00278 }
00279 
00280 
00281 
00282 =head2 seq_region_length
00283 
00284   Arg [1]    : none
00285   Example    : $seq_region_length = $slice->seq_region_length();
00286   Description: Returns the length of the entire seq_region that this slice is
00287                on. For example if this slice is on a chromosome this will be
00288                the length (in basepairs) of the entire chromosome.
00289   Returntype : int
00290   Exceptions : none
00291   Caller     : general
00292   Status     : Stable
00293 
00294 =cut
00295 
00296 sub seq_region_length {
00297   my $self = shift;
00298   return $self->{'seq_region_length'};
00299 }
00300 
00301 
00302 =head2 coord_system
00303 
00304   Arg [1]    : none
00305   Example    : print $slice->coord_system->name();
00306   Description: Returns the coordinate system that this slice is on.
00307   Returntype : Bio::EnsEMBL::CoordSystem
00308   Exceptions : none
00309   Caller     : general
00310   Status     : Stable
00311 
00312 =cut
00313 
00314 sub coord_system {
00315   my $self = shift;
00316   return $self->{'coord_system'};
00317 }
00318 
00319 =head2 coord_system_name
00320 
00321   Arg [1]    : none
00322   Example    : print $slice->coord_system_name()
00323   Description: Convenience method.  Gets the name of the coord_system which
00324                this slice is on.
00325                Returns undef if this Slice does not have an attached
00326                CoordSystem.
00327   Returntype: string or undef
00328   Exceptions: none
00329   Caller    : general
00330   Status     : Stable
00331 
00332 =cut
00333 
00334 sub coord_system_name {
00335   my $self = shift;
00336   my $csystem = $self->{'coord_system'};
00337   return ($csystem) ? $csystem->name() : undef;
00338 }
00339 
00340 
00341 =head2 centrepoint
00342 
00343   Arg [1]    : none
00344   Example    : $cp = $slice->centrepoint();
00345   Description: Returns the mid position of this slice relative to the
00346                start of the sequence region that it was created on.
00347                Coordinates are inclusive and start at 1.
00348   Returntype : int
00349   Exceptions : none
00350   Caller     : general
00351   Status     : Stable
00352 
00353 =cut
00354 
00355 sub centrepoint {
00356   my $self = shift;
00357   return ($self->{'start'}+$self->{'end'})/2;
00358 }
00359 
00360 =head2 start
00361 
00362   Arg [1]    : none
00363   Example    : $start = $slice->start();
00364   Description: Returns the start position of this slice relative to the
00365                start of the sequence region that it was created on.
00366                Coordinates are inclusive and start at 1.  Negative coordinates
00367                or coordinates exceeding the length of the sequence region are
00368                permitted.  Start is always less than or equal to end
00369                regardless of the orientation of the slice.
00370   Returntype : int
00371   Exceptions : none
00372   Caller     : general
00373   Status     : Stable
00374 
00375 =cut
00376 
00377 sub start {
00378   my $self = shift;
00379   return $self->{'start'};
00380 }
00381 
00382 
00383 
00384 =head2 end
00385 
00386   Arg [1]    : none
00387   Example    : $end = $slice->end();
00388   Description: Returns the end position of this slice relative to the
00389                start of the sequence region that it was created on.
00390                Coordinates are inclusive and start at 1.  Negative coordinates
00391                or coordinates exceeding the length of the sequence region are
00392                permitted.  End is always greater than or equal to start
00393                regardless of the orientation of the slice.
00394   Returntype : int
00395   Exceptions : none
00396   Caller     : general
00397   Status     : Stable
00398 
00399 =cut
00400 
00401 sub end {
00402   my $self = shift;
00403   return $self->{'end'};
00404 }
00405 
00406 
00407 
00408 =head2 strand
00409 
00410   Arg [1]    : none
00411   Example    : $strand = $slice->strand();
00412   Description: Returns the orientation of this slice on the seq_region it has
00413                been created on
00414   Returntype : int (either 1 or -1)
00415   Exceptions : none
00416   Caller     : general, invert
00417   Status     : Stable
00418 
00419 =cut
00420 
00421 sub strand{
00422   my $self = shift;
00423   return $self->{'strand'};
00424 }
00425 
00426 
00427 
00428 
00429 
00430 =head2 name
00431 
00432   Arg [1]    : none
00433   Example    : my $results = $cache{$slice->name()};
00434   Description: Returns the name of this slice. The name is formatted as a colon
00435                delimited string with the following attributes:
00436                coord_system:version:seq_region_name:start:end:strand
00437 
00438                Slices with the same name are equivalent and thus the name can
00439                act as a hash key.
00440   Returntype : string
00441   Exceptions : none
00442   Caller     : general
00443   Status     : Stable
00444 
00445 =cut
00446 
00447 sub name {
00448   my $self = shift;
00449 
00450   my $cs = $self->{'coord_system'};
00451 
00452   return join(':',
00453               ($cs) ? $cs->name()    : '',
00454               ($cs) ? $cs->version() : '',
00455               $self->{'seq_region_name'},
00456               $self->{'start'},
00457               $self->{'end'},
00458               $self->{'strand'});
00459 }
00460 
00461 
00462 
00463 =head2 length
00464 
00465   Arg [1]    : none
00466   Example    : $length = $slice->length();
00467   Description: Returns the length of this slice in basepairs
00468   Returntype : int
00469   Exceptions : none
00470   Caller     : general
00471   Status     : Stable
00472 
00473 =cut
00474 
00475 sub length {
00476   my ($self) = @_;
00477 
00478   my $length = $self->{'end'} - $self->{'start'} + 1;
00479  
00480   if ( $self->{'start'} > $self->{'end'} && $self->is_circular() ) {    
00481      $length += $self->{'seq_region_length'};
00482   }
00483 
00484   return $length;
00485 }
00486 
00487 =head2 is_reference
00488   Arg        : none
00489   Example    : my $reference = $slice->is_reference()
00490   Description: Returns 1 if slice is a reference  slice else 0
00491   Returntype : int
00492   Caller     : general
00493   Status     : At Risk
00494 
00495 =cut
00496 
00497 sub is_reference {
00498   my ($self) = @_;
00499 
00500   if ( !defined( $self->{'is_reference'} ) ) {
00501     $self->{'is_reference'} =
00502       $self->adaptor()->is_reference( $self->get_seq_region_id() );
00503   }
00504 
00505   return $self->{'is_reference'};
00506 }
00507 
00508 =head2 is_toplevel
00509   Arg        : none
00510   Example    : my $top = $slice->is_toplevel()
00511   Description: Returns 1 if slice is a toplevel slice else 0
00512   Returntype : int
00513   Caller     : general
00514   Status     : At Risk
00515 
00516 =cut
00517 
00518 sub is_toplevel {
00519   my ($self) = @_;
00520 
00521   if ( !defined( $self->{'toplevel'} ) ) {
00522     $self->{'toplevel'} =
00523       $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
00524   }
00525 
00526   return $self->{'toplevel'};
00527 }
00528 
00529 =head2 is_circular
00530   Arg        : none
00531   Example    : my $circ = $slice->is_circular()
00532   Description: Returns 1 if slice is a circular slice else 0
00533   Returntype : int
00534   Caller     : general
00535   Status     : At Risk
00536 
00537 =cut
00538 
00539 sub is_circular {
00540   my ($self) = @_;
00541 
00542   if ( !defined( $self->adaptor() ) ) { return 0 }
00543 
00544   if ( !defined( $self->{'circular'} ) ) {
00545     $self->{'circular'} = 0;  
00546 
00547     if ( !defined($self->adaptor()->{'is_circular'}) ){   
00548         $self->adaptor()->_build_circular_slice_cache();
00549     } 
00550 
00551     if ($self->adaptor()->{'is_circular'}) {
00552     if ( exists($self->adaptor()->{'circular_sr_id_cache'}->{ $self->adaptor()->get_seq_region_id($self) } ) ) {
00553         $self->{'circular'} = 1;
00554     }
00555     }
00556   }
00557   return $self->{'circular'};
00558 }
00559 
00560 =head2 invert
00561 
00562   Arg [1]    : none
00563   Example    : $inverted_slice = $slice->invert;
00564   Description: Creates a copy of this slice on the opposite strand and
00565                returns it.
00566   Returntype : Bio::EnsEMBL::Slice
00567   Exceptions : none
00568   Caller     : general
00569   Status     : Stable
00570 
00571 =cut
00572 
00573 sub invert {
00574   my $self = shift;
00575 
00576   # make a shallow copy of the slice via a hash copy and flip the strand
00577   my %s = %$self;
00578   $s{'strand'} = $self->{'strand'} * -1;
00579 
00580   # reverse compliment any attached sequence
00581   reverse_comp(\$s{'seq'}) if($s{'seq'});
00582 
00583   # bless and return the copy
00584   return  bless \%s, ref $self;
00585 }
00586 
00587 
00588 
00589 =head2 seq
00590 
00591   Arg [1]    : none
00592   Example    : print "SEQUENCE = ", $slice->seq();
00593   Description: Returns the sequence of the region represented by this
00594                slice formatted as a string.
00595   Returntype : string
00596   Exceptions : none
00597   Caller     : general
00598   Status     : Stable
00599 
00600 =cut
00601 
00602 sub seq {
00603   my $self = shift;
00604 
00605   # special case for in-between (insert) coordinates
00606   return '' if($self->start() == $self->end() + 1);
00607 
00608   return $self->{'seq'} if($self->{'seq'});
00609 
00610   if($self->adaptor()) {
00611     my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
00612     return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
00613   }
00614 
00615   # no attached sequence, and no db, so just return Ns
00616   return 'N' x $self->length();
00617 }
00618 
00619 
00620 
00621 =head2 subseq
00622 
00623   Arg  [1]   : int $startBasePair
00624                relative to start of slice, which is 1.
00625   Arg  [2]   : int $endBasePair
00626                relative to start of slice.
00627   Arg  [3]   : (optional) int $strand
00628                The strand of the slice to obtain sequence from. Default
00629                value is 1.
00630   Description: returns string of dna sequence
00631   Returntype : txt
00632   Exceptions : end should be at least as big as start
00633                strand must be set
00634   Caller     : general
00635   Status     : Stable
00636 
00637 =cut
00638 
00639 sub subseq {
00640   my ( $self, $start, $end, $strand ) = @_;
00641 
00642   if ( $end+1 < $start ) {
00643     throw("End coord + 1 is less then start coord");
00644   }
00645 
00646   # handle 'between' case for insertions
00647   return '' if( $start == $end + 1);
00648 
00649   $strand = 1 unless(defined $strand);
00650 
00651   if ( $strand != -1 && $strand != 1 ) {
00652     throw("Invalid strand [$strand] in call to Slice::subseq.");
00653   }
00654   my $subseq;
00655   if($self->adaptor){
00656     my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
00657     $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
00658       ( $self, $start,
00659         $end, $strand )};
00660   } else {
00661     ## check for gap at the beginning and pad it with Ns
00662     if ($start < 1) {
00663       $subseq = "N" x (1 - $start);
00664       $start = 1;
00665     }
00666     $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
00667     ## check for gap at the end and pad it with Ns
00668     if ($end > $self->length()) {
00669       $subseq .= "N" x ($end - $self->length());
00670     }
00671     reverse_comp(\$subseq) if($strand == -1);
00672   }
00673   return $subseq;
00674 }
00675 
00676 =head2 assembly_exception_type
00677 
00678   Example     : $self->assembly_exception_type(); 
00679   Description : Returns the type of slice this is. If it is reference then you
00680                 will get 'REF' back. Otherwise you will get the first
00681                 element from C<get_all_AssemblyExceptionFeatures()>. If no
00682                 assembly exception exists you will get an empty string back.
00683   Returntype  : String
00684   Exceptions  : None
00685   Caller      : Public
00686   Status      : Beta
00687 
00688 =cut
00689 
00690 sub assembly_exception_type {
00691   my ($self) = @_;
00692   my $type = q{};
00693   if($self->is_reference()) {
00694     $type = 'REF';
00695   }
00696   else {
00697     my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
00698     if(@{$assembly_exceptions}) {
00699       $type = $assembly_exceptions->[0]->type();
00700     }
00701   }
00702   return $type;
00703 }
00704 
00705 
00706 =head2 get_base_count
00707 
00708   Arg [1]    : none
00709   Example    : $c_count = $slice->get_base_count->{'c'};
00710   Description: Retrieves a hashref containing the counts of each bases in the
00711                sequence spanned by this slice.  The format of the hash is :
00712                { 'a' => num,
00713                  'c' => num,
00714                  't' => num,
00715                  'g' => num,
00716                  'n' => num,
00717                  '%gc' => num }
00718 
00719                All bases which are not in the set [A,a,C,c,T,t,G,g] are
00720                included in the 'n' count.  The 'n' count could therefore be
00721                inclusive of ambiguity codes such as 'y'.
00722                The %gc is the ratio of GC to AT content as in:
00723                total(GC)/total(ACTG) * 100
00724                This function is conservative in its memory usage and scales to
00725                work for entire chromosomes.
00726   Returntype : hashref
00727   Exceptions : none
00728   Caller     : general
00729   Status     : Stable
00730 
00731 =cut
00732 
00733 sub get_base_count {
00734   my $self = shift;
00735 
00736   my $a = 0;
00737   my $c = 0;
00738   my $t = 0;
00739   my $g = 0;
00740 
00741   my $start = 1;
00742   my $end;
00743 
00744   my $RANGE = 100_000;
00745   my $len   = $self->length();
00746 
00747   my $seq;
00748 
00749   while ( $start <= $len ) {
00750     $end = $start + $RANGE - 1;
00751     $end = $len if ( $end > $len );
00752 
00753     $seq = $self->subseq( $start, $end );
00754 
00755     $a += $seq =~ tr/Aa//;
00756     $c += $seq =~ tr/Cc//;
00757     $t += $seq =~ tr/Tt//;
00758     $g += $seq =~ tr/Gg//;
00759 
00760     $start = $end + 1;
00761   }
00762 
00763   my $actg = $a + $c + $t + $g;
00764 
00765   my $gc_content = 0;
00766   if ( $actg > 0 ) {    # Avoid dividing by 0
00767     $gc_content = sprintf( "%1.2f", ( ( $g + $c )/$actg )*100 );
00768   }
00769 
00770   return { 'a'   => $a,
00771            'c'   => $c,
00772            't'   => $t,
00773            'g'   => $g,
00774            'n'   => $len - $actg,
00775            '%gc' => $gc_content };
00776 }
00777 
00778 
00779 
00780 =head2 project
00781 
00782   Arg [1]    : string $name
00783                The name of the coordinate system to project this slice onto
00784   Arg [2]    : string $version
00785                The version of the coordinate system (such as 'NCBI34') to
00786                project this slice onto
00787   Example    :
00788     my $clone_projection = $slice->project('clone');
00789 
00790     foreach my $seg (@$clone_projection) {
00791       my $clone = $segment->to_Slice();
00792       print $slice->seq_region_name(), ':', $seg->from_start(), '-',
00793             $seg->from_end(), ' -> ',
00794             $clone->seq_region_name(), ':', $clone->start(), '-',$clone->end(),
00795             $clone->strand(), "\n";
00796     }
00797   Description: Returns the results of 'projecting' this slice onto another
00798                coordinate system.  Projecting to a coordinate system that
00799                the slice is assembled from is analagous to retrieving a tiling
00800                path.  This method may also be used to 'project up' to a higher
00801                level coordinate system, however.
00802 
00803                This method returns a listref of triplets [start,end,slice]
00804                which represents the projection.  The start and end defined the
00805                region of this slice which is made up of the third value of
00806                the triplet: a slice in the requested coordinate system.
00807   Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
00808                can also be used as [$start,$end,$slice] triplets
00809   Exceptions : none
00810   Caller     : general
00811   Status     : Stable
00812 
00813 =cut
00814 
00815 sub project {
00816   my $self = shift;
00817   my $cs_name = shift;
00818   my $cs_version = shift;
00819 
00820   throw('Coord_system name argument is required') if(!$cs_name);
00821 
00822   my $slice_adaptor = $self->adaptor();
00823 
00824   if(!$slice_adaptor) {
00825     warning("Cannot project without attached adaptor.");
00826     return [];
00827   }
00828 
00829   if(!$self->coord_system()) {
00830     warning("Cannot project without attached coord system.");
00831     return [];
00832   }
00833 
00834 
00835   my $db = $slice_adaptor->db();
00836   my $csa = $db->get_CoordSystemAdaptor();
00837   my $cs = $csa->fetch_by_name($cs_name, $cs_version);
00838   my $slice_cs = $self->coord_system();
00839 
00840   if(!$cs) {
00841     throw("Cannot project to unknown coordinate system " .
00842           "[$cs_name $cs_version]");
00843   }
00844 
00845   # no mapping is needed if the requested coord system is the one we are in
00846   # but we do need to check if some of the slice is outside of defined regions
00847   if($slice_cs->equals($cs)) {
00848     return $self->_constrain_to_region();
00849   }
00850 
00851   my @projection;
00852   my $current_start = 1;
00853 
00854   # decompose this slice into its symlinked components.
00855   # this allows us to handle haplotypes and PARs
00856   my $normal_slice_proj =
00857     $slice_adaptor->fetch_normalized_slice_projection($self);
00858   foreach my $segment (@$normal_slice_proj) {
00859     my $normal_slice = $segment->[2];
00860 
00861     $slice_cs = $normal_slice->coord_system();
00862 
00863     my $asma = $db->get_AssemblyMapperAdaptor();
00864     my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
00865 
00866     # perform the mapping between this slice and the requested system
00867     my @coords;
00868 
00869     if( defined $asm_mapper ) {
00870      @coords = $asm_mapper->map($normal_slice->seq_region_name(),
00871                  $normal_slice->start(),
00872                  $normal_slice->end(),
00873                  $normal_slice->strand(),
00874                  $slice_cs);
00875     } else {
00876       $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
00877                            $normal_slice->end());
00878     }
00879 
00880 
00881     # my $last_rank = 0;
00882     #construct a projection from the mapping results and return it
00883     foreach my $coord (@coords) {
00884       my $coord_start  = $coord->start();
00885       my $coord_end    = $coord->end();
00886       my $length       = $coord_end - $coord_start + 1;
00887 
00888       if ( $coord_start > $coord_end ) {
00889         $length =
00890           $normal_slice->seq_region_length() -
00891           $coord_start +
00892           $coord_end + 1;
00893       }
00894 
00895 #      if( $last_rank != $coord->rank){
00896 #   $current_start = 1;
00897 #   print "LAST rank has changed to ".$coord->rank."from $last_rank \n";
00898 #     }
00899 #      $last_rank = $coord->rank;
00900 
00901       #skip gaps
00902       if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
00903 
00904         my $coord_cs     = $coord->coord_system();
00905 
00906         # If the normalised projection just ended up mapping to the
00907         # same coordinate system we were already in then we should just
00908         # return the original region.  This can happen for example, if we
00909         # were on a PAR region on Y which refered to X and a projection to
00910         # 'toplevel' was requested.
00911         if($coord_cs->equals($slice_cs)) {
00912           # trim off regions which are not defined
00913           return $self->_constrain_to_region();
00914         }
00915     #create slices for the mapped-to coord system
00916         my $slice = $slice_adaptor->fetch_by_seq_region_id(
00917                                                     $coord->id(),
00918                                                     $coord_start,
00919                                                     $coord_end,
00920                                                     $coord->strand());
00921 
00922     my $current_end = $current_start + $length - 1;
00923 
00924     if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
00925         $current_end -= $slice->seq_region_length();
00926         }
00927 
00928         push @projection, bless([$current_start, $current_end, $slice],
00929                                 "Bio::EnsEMBL::ProjectionSegment");
00930       }
00931 
00932       $current_start += $length;
00933     }
00934   }
00935 
00936   return \@projection;
00937 }
00938 
00939 
00940 sub _constrain_to_region {
00941   my $self = shift;
00942 
00943   my $entire_len = $self->seq_region_length();
00944 
00945   #if the slice has negative coordinates or coordinates exceeding the
00946   #exceeding length of the sequence region we want to shrink the slice to
00947   #the defined region
00948 
00949   if($self->{'start'} > $entire_len || $self->{'end'} < 1) {
00950     #none of this slice is in a defined region
00951     return [];
00952   }
00953 
00954   my $right_contract = 0;
00955   my $left_contract  = 0;
00956   if($self->{'end'} > $entire_len) {
00957     $right_contract = $entire_len - $self->{'end'};
00958   }
00959   if($self->{'start'} < 1) {
00960     $left_contract = $self->{'start'} - 1;
00961   }
00962 
00963   my $new_slice;
00964   if($left_contract || $right_contract) {
00965       #if slice in negative strand, need to swap contracts
00966       if ($self->strand == 1) {
00967       $new_slice = $self->expand($left_contract, $right_contract);
00968       }
00969       elsif ($self->strand == -1) {
00970       $new_slice = $self->expand($right_contract, $left_contract);
00971       }
00972   } else {
00973     $new_slice = $self;
00974   }
00975 
00976   return [bless [1-$left_contract, $self->length()+$right_contract,
00977                  $new_slice], "Bio::EnsEMBL::ProjectionSegment" ];
00978 }
00979 
00980 
00981 =head2 expand
00982 
00983   Arg [1]    : (optional) int $five_prime_expand
00984                The number of basepairs to shift this slices five_prime
00985                coordinate by.  Positive values make the slice larger,
00986                negative make the slice smaller.
00987                coordinate left.
00988                Default = 0.
00989   Arg [2]    : (optional) int $three_prime_expand
00990                The number of basepairs to shift this slices three_prime
00991                coordinate by. Positive values make the slice larger,
00992                negative make the slice smaller.
00993                Default = 0.
00994   Arg [3]    : (optional) bool $force_expand
00995                if set to 1, then the slice will be contracted even in the case
00996                when shifts $five_prime_expand and $three_prime_expand overlap.
00997                In that case $five_prime_expand and $three_prime_expand will be set
00998                to a maximum possible number and that will result in the slice
00999                which would have only 2pbs.
01000                Default = 0.
01001   Arg [4]    : (optional) int* $fpref
01002                The reference to a number of basepairs to shift this slices five_prime
01003                coordinate by. Normally it would be set to $five_prime_expand.
01004                But in case when $five_prime_expand shift can not be applied and
01005                $force_expand is set to 1, then $$fpref will contain the maximum possible
01006                shift
01007   Arg [5]    : (optional) int* $tpref
01008                The reference to a number of basepairs to shift this slices three_prime
01009                coordinate by. Normally it would be set to $three_prime_expand.
01010                But in case when $five_prime_expand shift can not be applied and
01011                $force_expand is set to 1, then $$tpref will contain the maximum possible
01012                shift
01013   Example    : my $expanded_slice      = $slice->expand( 1000, 1000);
01014                my $contracted_slice    = $slice->expand(-1000,-1000);
01015                my $shifted_right_slice = $slice->expand(-1000, 1000);
01016                my $shifted_left_slice  = $slice->expand( 1000,-1000);
01017                my $forced_contracted_slice    = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
01018 
01019   Description: Returns a slice which is a resized copy of this slice.  The
01020                start and end are moved outwards from the center of the slice
01021                if positive values are provided and moved inwards if negative
01022                values are provided. This slice remains unchanged.  A slice
01023                may not be contracted below 1bp but may grow to be arbitrarily
01024                large.
01025   Returntype : Bio::EnsEMBL::Slice
01026   Exceptions : warning if an attempt is made to contract the slice below 1bp
01027   Caller     : general
01028   Status     : Stable
01029 
01030 =cut
01031 
01032 sub expand {
01033   my $self              = shift;
01034   my $five_prime_shift  = shift || 0;
01035   my $three_prime_shift = shift || 0;
01036   my $force_expand      = shift || 0;
01037   my $fpref             = shift;
01038   my $tpref             = shift;
01039 
01040   if ( $self->{'seq'} ) {
01041     warning(
01042        "Cannot expand a slice which has a manually attached sequence ");
01043     return undef;
01044   }
01045 
01046   my $sshift = $five_prime_shift;
01047   my $eshift = $three_prime_shift;
01048 
01049   if ( $self->{'strand'} != 1 ) {
01050     $eshift = $five_prime_shift;
01051     $sshift = $three_prime_shift;
01052   }
01053 
01054   my $new_start = $self->{'start'} - $sshift;
01055   my $new_end   = $self->{'end'} + $eshift;
01056 
01057   if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0 || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
01058       
01059       if ( $new_start <= 0 ) {
01060         $new_start = $self->seq_region_length() + $new_start;
01061       }
01062       if ( $new_start > $self->seq_region_length() ) {
01063         $new_start -= $self->seq_region_length();
01064       }
01065   
01066       if ( $new_end <= 0 ) {
01067         $new_end = $self->seq_region_length() + $new_end;
01068       }
01069       if ( $new_end > $self->seq_region_length() ) {
01070         $new_end -= $self->seq_region_length();
01071       }      
01072       
01073   }
01074 
01075   if ( $new_start > $new_end  && (not $self->is_circular() ) ) {
01076    
01077       if ($force_expand) {
01078         # Apply max possible shift, if force_expand is set
01079         if ( $sshift < 0 ) {
01080           # if we are contracting the slice from the start - move the
01081           # start just before the end
01082           $new_start = $new_end - 1;
01083           $sshift    = $self->{start} - $new_start;
01084         }
01085 
01086         if ( $new_start > $new_end ) {
01087           # if the slice still has a negative length - try to move the
01088           # end
01089           if ( $eshift < 0 ) {
01090             $new_end = $new_start + 1;
01091             $eshift  = $new_end - $self->{end};
01092           }
01093         }
01094         # return the values by which the primes were actually shifted
01095         $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
01096         $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
01097       }
01098       if ( $new_start > $new_end ) {
01099         throw('Slice start cannot be greater than slice end');
01100       }
01101   }
01102 
01103   #fastest way to copy a slice is to do a shallow hash copy
01104   my %new_slice = %$self;
01105   $new_slice{'start'} = int($new_start);
01106   $new_slice{'end'}   = int($new_end);
01107 
01108   return bless \%new_slice, ref($self);
01109 } ## end sub expand
01110 
01111 
01112 
01113 =head2 sub_Slice
01114 
01115   Arg   1    : int $start
01116   Arg   2    : int $end
01117   Arge [3]   : int $strand
01118   Example    : none
01119   Description: Makes another Slice that covers only part of this slice
01120                If a slice is requested which lies outside of the boundaries
01121                of this function will return undef.  This means that
01122                behaviour will be consistant whether or not the slice is
01123                attached to the database (i.e. if there is attached sequence
01124                to the slice).  Alternatively the expand() method or the
01125                SliceAdaptor::fetch_by_region method can be used instead.
01126   Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
01127   Exceptions : none
01128   Caller     : general
01129   Status     : Stable
01130 
01131 =cut
01132 
01133 sub sub_Slice {
01134   my ( $self, $start, $end, $strand ) = @_;
01135 
01136   if( $start < 1 || $start > $self->{'end'} ) {
01137     # throw( "start argument not valid" );
01138     return undef;
01139   }
01140 
01141   if( $end < $start || $end > $self->{'end'} ) {
01142     # throw( "end argument not valid" )
01143     return undef;
01144   }
01145 
01146   my ( $new_start, $new_end, $new_strand, $new_seq );
01147   if( ! defined $strand ) {
01148     $strand = 1;
01149   }
01150 
01151   if( $self->{'strand'} == 1 ) {
01152     $new_start = $self->{'start'} + $start - 1;
01153     $new_end = $self->{'start'} + $end - 1;
01154     $new_strand = $strand;
01155   } else {
01156     $new_start = $self->{'end'} - $end + 1;;
01157     $new_end = $self->{'end'} - $start + 1;
01158     $new_strand = -$strand;
01159   }
01160 
01161   if( defined $self->{'seq'} ) {
01162     $new_seq = $self->subseq( $start, $end, $strand );
01163   }
01164 
01165   #fastest way to copy a slice is to do a shallow hash copy
01166   my %new_slice = %$self;
01167   $new_slice{'start'} = int($new_start);
01168   $new_slice{'end'}   = int($new_end);
01169   $new_slice{'strand'} = $new_strand;
01170   if( $new_seq ) {
01171     $new_slice{'seq'} = $new_seq;
01172   }
01173 
01174   return bless \%new_slice, ref($self);
01175 }
01176 
01177 
01178 
01179 =head2 seq_region_Slice
01180 
01181   Arg [1]    : none
01182   Example    : $slice = $slice->seq_region_Slice();
01183   Description: Returns a slice which spans the whole seq_region which this slice
01184                is on.  For example if this is a slice which spans a small region
01185                of chromosome X, this method will return a slice which covers the
01186                entire chromosome X. The returned slice will always have strand
01187                of 1 and start of 1.  This method cannot be used if the sequence
01188                of the slice has been set manually.
01189   Returntype : Bio::EnsEMBL::Slice
01190   Exceptions : warning if called when sequence of Slice has been set manually.
01191   Caller     : general
01192   Status     : Stable
01193 
01194 =cut
01195 
01196 sub seq_region_Slice {
01197   my $self = shift;
01198 
01199   if($self->{'seq'}){
01200     warning("Cannot get a seq_region_Slice of a slice which has manually ".
01201             "attached sequence ");
01202     return undef;
01203   }
01204 
01205   # quick shallow copy
01206   my $slice;
01207   %{$slice} = %{$self};
01208   bless $slice, ref($self);
01209 
01210   $slice->{'start'}  = 1;
01211   $slice->{'end'}    = $slice->{'seq_region_length'};
01212   $slice->{'strand'} = 1;
01213 
01214   return $slice;
01215 }
01216 
01217 
01218 =head2 get_seq_region_id
01219 
01220   Arg [1]    : none
01221   Example    : my $seq_region_id = $slice->get_seq_region_id();
01222   Description: Gets the internal identifier of the seq_region that this slice
01223                is on. Note that this function will not work correctly if this
01224                slice does not have an attached adaptor. Also note that it may
01225                be better to go through the SliceAdaptor::get_seq_region_id
01226                method if you are working with multiple databases since is
01227                possible to work with slices from databases with different
01228                internal seq_region identifiers.
01229   Returntype : int or undef if slices does not have attached adaptor
01230   Exceptions : warning if slice is not associated with a SliceAdaptor
01231   Caller     : assembly loading scripts, general
01232   Status     : Stable
01233 
01234 =cut
01235 
01236 sub get_seq_region_id {
01237   my ($self) = @_;
01238 
01239   if($self->adaptor) {
01240     return $self->adaptor->get_seq_region_id($self);
01241   } else {
01242     warning('Cannot retrieve seq_region_id without attached adaptor.');
01243     return undef;
01244   }
01245 }
01246 
01247 
01248 =head2 get_all_Attributes
01249 
01250   Arg [1]    : optional string $attrib_code
01251                The code of the attribute type to retrieve values for.
01252   Example    : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
01253                @slice_attributes    = @{$slice->get_all_Attributes()};
01254   Description: Gets a list of Attributes of this slice''s seq_region.
01255                Optionally just get Attrubutes for given code.
01256   Returntype : listref Bio::EnsEMBL::Attribute
01257   Exceptions : warning if slice does not have attached adaptor
01258   Caller     : general
01259   Status     : Stable
01260 
01261 =cut
01262 
01263 sub get_all_Attributes {
01264   my $self = shift;
01265   my $attrib_code = shift;
01266 
01267   my $result;
01268   my @results;
01269 
01270   if(!$self->adaptor()) {
01271     warning('Cannot get attributes without an adaptor.');
01272     return [];
01273   }
01274 
01275   my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
01276 
01277   if( defined $attrib_code ) {
01278     @results = grep { uc($_->code()) eq uc($attrib_code) }
01279       @{$attribute_adaptor->fetch_all_by_Slice( $self )};
01280     $result = \@results;
01281   } else {
01282     $result = $attribute_adaptor->fetch_all_by_Slice( $self );
01283   }
01284 
01285   return $result;
01286 }
01287 
01288 
01289 =head2 get_all_PredictionTranscripts
01290 
01291   Arg [1]    : (optional) string $logic_name
01292                The name of the analysis used to generate the prediction
01293                transcripts obtained.
01294   Arg [2]    : (optional) boolean $load_exons
01295                If set to true will force loading of all PredictionExons
01296                immediately rather than loading them on demand later.  This
01297                is faster if there are a large number of PredictionTranscripts
01298                and the exons will be used.
01299   Example    : @transcripts = @{$slice->get_all_PredictionTranscripts};
01300   Description: Retrieves the list of prediction transcripts which overlap
01301                this slice with logic_name $logic_name.  If logic_name is
01302                not defined then all prediction transcripts are retrieved.
01303   Returntype : listref of Bio::EnsEMBL::PredictionTranscript
01304   Exceptions : warning if slice does not have attached adaptor
01305   Caller     : none
01306   Status     : Stable
01307 
01308 =cut
01309 
01310 sub get_all_PredictionTranscripts {
01311    my ($self,$logic_name, $load_exons) = @_;
01312 
01313    if(!$self->adaptor()) {
01314      warning('Cannot get PredictionTranscripts without attached adaptor');
01315      return [];
01316    }
01317    my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
01318    return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
01319 }
01320 
01321 
01322 
01323 =head2 get_all_DnaAlignFeatures
01324 
01325   Arg [1]    : (optional) string $logic_name
01326                The name of the analysis performed on the dna align features
01327                to obtain.
01328   Arg [2]    : (optional) float $score
01329                The mimimum score of the features to retrieve
01330   Arg [3]    : (optional) string $dbtype
01331                The name of an attached database to retrieve the features from
01332                instead, e.g. 'otherfeatures'.
01333   Arg [4]    : (optional) float hcoverage
01334                The minimum hcoverage od the featurs to retrieve
01335   Example    : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
01336   Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
01337                logic name $logic_name and with score above $score.  If
01338                $logic_name is not defined features of all logic names are
01339                retrieved.  If $score is not defined features of all scores are
01340                retrieved.
01341   Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
01342   Exceptions : warning if slice does not have attached adaptor
01343   Caller     : general
01344   Status     : Stable
01345 
01346 =cut
01347 
01348 sub get_all_DnaAlignFeatures {
01349    my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
01350 
01351    if(!$self->adaptor()) {
01352      warning('Cannot get DnaAlignFeatures without attached adaptor');
01353      return [];
01354    }
01355 
01356    my $db;
01357 
01358    if($dbtype) {
01359      $db = $self->adaptor->db->get_db_adaptor($dbtype);
01360      if(!$db) {
01361        warning("Don't have db $dbtype returning empty list\n");
01362        return [];
01363      }
01364    } else {
01365      $db = $self->adaptor->db;
01366    }
01367 
01368    my $dafa = $db->get_DnaAlignFeatureAdaptor();
01369 
01370    if(defined($score) and defined ($hcoverage)){
01371      warning "cannot specify score and hcoverage. Using score only";
01372    }
01373    if(defined($score)){
01374      return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
01375    }
01376    return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
01377 }
01378 
01379 
01380 
01381 =head2 get_all_ProteinAlignFeatures
01382 
01383   Arg [1]    : (optional) string $logic_name
01384                The name of the analysis performed on the protein align features
01385                to obtain.
01386   Arg [2]    : (optional) float $score
01387                The mimimum score of the features to retrieve
01388   Arg [3]    : (optional) string $dbtype
01389                The name of an attached database to retrieve features from
01390                instead.
01391   Arg [4]    : (optional) float hcoverage
01392                The minimum hcoverage od the featurs to retrieve
01393   Example    : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
01394   Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
01395                logic name $logic_name and with score above $score.  If
01396                $logic_name is not defined features of all logic names are
01397                retrieved.  If $score is not defined features of all scores are
01398                retrieved.
01399   Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
01400   Exceptions : warning if slice does not have attached adaptor
01401   Caller     : general
01402   Status     : Stable
01403 
01404 =cut
01405 
01406 sub get_all_ProteinAlignFeatures {
01407   my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
01408 
01409   if(!$self->adaptor()) {
01410     warning('Cannot get ProteinAlignFeatures without attached adaptor');
01411     return [];
01412   }
01413 
01414   my $db;
01415 
01416   if($dbtype) {
01417     $db = $self->adaptor->db->get_db_adaptor($dbtype);
01418     if(!$db) {
01419       warning("Don't have db $dbtype returning empty list\n");
01420       return [];
01421     }
01422   } else {
01423     $db = $self->adaptor->db;
01424   }
01425 
01426   my $pafa = $db->get_ProteinAlignFeatureAdaptor();
01427 
01428   if(defined($score) and defined ($hcoverage)){
01429     warning "cannot specify score and hcoverage. Using score only";
01430   }
01431   if(defined($score)){
01432     return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
01433   }
01434   return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
01435 
01436 }
01437 
01438 
01439 
01440 =head2 get_all_SimilarityFeatures
01441 
01442   Arg [1]    : (optional) string $logic_name
01443                the name of the analysis performed on the features to retrieve
01444   Arg [2]    : (optional) float $score
01445                the lower bound of the score of the features to be retrieved
01446   Example    : @feats = @{$slice->get_all_SimilarityFeatures};
01447   Description: Retrieves all dna_align_features and protein_align_features
01448                with analysis named $logic_name and with score above $score.
01449                It is probably faster to use get_all_ProteinAlignFeatures or
01450                get_all_DnaAlignFeatures if a sepcific feature type is desired.
01451                If $logic_name is not defined features of all logic names are
01452                retrieved.  If $score is not defined features of all scores are
01453                retrieved.
01454   Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
01455   Exceptions : warning if slice does not have attached adaptor
01456   Caller     : general
01457   Status     : Stable
01458 
01459 =cut
01460 
01461 sub get_all_SimilarityFeatures {
01462   my ($self, $logic_name, $score) = @_;
01463 
01464   my @out = ();
01465 
01466   push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
01467   push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
01468 
01469   return \@out;
01470 }
01471 
01472 
01473 
01474 =head2 get_all_SimpleFeatures
01475 
01476   Arg [1]    : (optional) string $logic_name
01477                The name of the analysis performed on the simple features
01478                to obtain.
01479   Arg [2]    : (optional) float $score
01480                The mimimum score of the features to retrieve
01481   Example    : @simple_feats = @{$slice->get_all_SimpleFeatures};
01482   Description: Retrieves the SimpleFeatures which overlap this slice with
01483                logic name $logic_name and with score above $score.  If
01484                $logic_name is not defined features of all logic names are
01485                retrieved.  If $score is not defined features of all scores are
01486                retrieved.
01487   Returntype : listref of Bio::EnsEMBL::SimpleFeatures
01488   Exceptions : warning if slice does not have attached adaptor
01489   Caller     : general
01490   Status     : Stable
01491 
01492 =cut
01493 
01494 sub get_all_SimpleFeatures {
01495   my ($self, $logic_name, $score, $dbtype) = @_;
01496 
01497   if(!$self->adaptor()) {
01498     warning('Cannot get SimpleFeatures without attached adaptor');
01499     return [];
01500   }
01501 
01502   my $db;
01503   if($dbtype) {
01504      $db = $self->adaptor->db->get_db_adaptor($dbtype);
01505      if(!$db) {
01506        warning("Don't have db $dbtype returning empty list\n");
01507        return [];
01508      }
01509   } else {
01510     $db = $self->adaptor->db;
01511   }
01512 
01513   my $sfa = $db->get_SimpleFeatureAdaptor();
01514 
01515   return $sfa->fetch_all_by_Slice_and_score($self, $score, $logic_name);
01516 }
01517 
01518 
01519 
01520 =head2 get_all_RepeatFeatures
01521 
01522   Arg [1]    : (optional) string $logic_name
01523                The name of the analysis performed on the repeat features
01524                to obtain.
01525   Arg [2]    : (optional) string/array $repeat_type
01526                Limits features returned to those of the specified 
01527                repeat_type. Can specify a single value or an array reference
01528                to limit by more than one
01529   Arg [3]    : (optional) string $db
01530                Key for database e.g. core/vega/cdna/....
01531   Example    : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'Type II Transposons')};
01532   Description: Retrieves the RepeatFeatures which overlap  with
01533                logic name $logic_name and with score above $score.  If
01534                $logic_name is not defined features of all logic names are
01535                retrieved.
01536   Returntype : listref of Bio::EnsEMBL::RepeatFeatures
01537   Exceptions : warning if slice does not have attached adaptor
01538   Caller     : general
01539   Status     : Stable
01540 
01541 =cut
01542 
01543 sub get_all_RepeatFeatures {
01544   my ($self, $logic_name, $repeat_type, $dbtype) = @_;
01545 
01546   if(!$self->adaptor()) {
01547     warning('Cannot get RepeatFeatures without attached adaptor');
01548     return [];
01549   }
01550 
01551   my $db;
01552   if($dbtype) {
01553      $db = $self->adaptor->db->get_db_adaptor($dbtype);
01554      if(!$db) {
01555        warning("Don't have db $dbtype returning empty list\n");
01556        return [];
01557      }
01558   } else {
01559     $db = $self->adaptor->db;
01560   }
01561 
01562   my $rpfa = $db->get_RepeatFeatureAdaptor();
01563 
01564   return $rpfa->fetch_all_by_Slice($self, $logic_name, $repeat_type);
01565 }
01566 
01567 =head2 get_all_LD_values
01568 
01569     Arg [1]     : (optional) Bio::EnsEMBL::Variation::Population $population
01570     Description : returns all LD values on this slice. This function will only work correctly if the variation
01571                   database has been attached to the core database. If the argument is passed, will return the LD information
01572                   in that population
01573     ReturnType  : Bio::EnsEMBL::Variation::LDFeatureContainer
01574     Exceptions  : none
01575     Caller      : contigview, snpview
01576      Status     : At Risk
01577                 : Variation database is under development.
01578 
01579 =cut
01580 
01581 sub get_all_LD_values{
01582     my $self = shift;
01583     my $population = shift;
01584 
01585 
01586     if(!$self->adaptor()) {
01587     warning('Cannot get LDFeatureContainer without attached adaptor');
01588     return [];
01589     }
01590 
01591     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
01592 
01593     unless($variation_db) {
01594     warning("Variation database must be attached to core database to " .
01595         "retrieve variation information" );
01596     return [];
01597     }
01598 
01599     my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor;
01600 
01601     if( $ld_adaptor ) {
01602     return $ld_adaptor->fetch_by_Slice($self,$population);
01603     } else {
01604     return [];
01605 
01606     }
01607 
01608 #     my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer");
01609 
01610 #   if( $ld_adaptor ) {
01611 #       my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population);
01612 #       if (@{$ld_values} > 1){
01613 #     warning("More than 1 variation database attached. Trying to merge LD results");
01614 #     my $ld_value_merged = shift @{$ld_values};
01615 #     #with more than 1 variation database attached, will try to merge in one single LDContainer object.
01616 #     foreach my $ld (@{$ld_values}){
01617 #         #copy the ld values to the result hash
01618 #         foreach my $key (keys %{$ld->{'ldContainer'}}){
01619 #         $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key};
01620 #         }
01621 #         #and copy the variationFeatures as well
01622 #         foreach my $key (keys %{$ld->{'variationFeatures'}}){
01623 #         $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key};
01624 #         }
01625 
01626 #     }
01627 #     return $ld_value_merged;
01628 #       }
01629 #       else{
01630 #     return shift @{$ld_values};
01631 #       }
01632 # } else {
01633 #     warning("Variation database must be attached to core database to " .
01634 #       "retrieve variation information" );
01635 #     return [];
01636 # }
01637 }
01638 
01639 sub _get_VariationFeatureAdaptor {
01640     
01641   my $self = shift;
01642     
01643   if(!$self->adaptor()) {
01644     warning('Cannot get variation features without attached adaptor');
01645     return undef;
01646   }
01647     
01648   my $vf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
01649     -species  => $self->adaptor()->db()->species, 
01650     -type     => "VariationFeature"
01651   );
01652   
01653   if( $vf_adaptor ) {
01654     return $vf_adaptor;
01655   }
01656   else {
01657     warning("Variation database must be attached to core database to " .
01658             "retrieve variation information" );
01659         
01660     return undef;
01661   }
01662 }
01663 
01664 =head2 get_all_VariationFeatures
01665 
01666     Args        : $filter [optional] (deprecated)
01667     Description : Returns all germline variation features on this slice. This function will 
01668                   only work correctly if the variation database has been attached to the core 
01669                   database.
01670                   If (the deprecated parameter) $filter is "genotyped" return genotyped SNPs only, 
01671                   otherwise return all germline variations.
01672     ReturnType  : listref of Bio::EnsEMBL::Variation::VariationFeature
01673     Exceptions  : none
01674     Caller      : contigview, snpview
01675     Status      : At Risk
01676                 : Variation database is under development.
01677 
01678 =cut
01679 
01680 sub get_all_VariationFeatures{
01681   my $self = shift;
01682   my $filter = shift;
01683 
01684   if ($filter && $filter eq 'genotyped') {
01685     deprecate("Don't pass 'genotyped' parameter, call get_all_genotyped_VariationFeatures instead");
01686     return $self->get_all_genotyped_VariationFeatures;
01687   }
01688     
01689   if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
01690     return $vf_adaptor->fetch_all_by_Slice($self);
01691   }
01692   else {
01693     return [];
01694   }
01695 }
01696 
01697 =head2 get_all_somatic_VariationFeatures
01698 
01699     Args        : $filter [optional]
01700     Description : Returns all somatic variation features on this slice. This function will only 
01701                   work correctly if the variation database has been attached to the core database.
01702     ReturnType  : listref of Bio::EnsEMBL::Variation::VariationFeature
01703     Exceptions  : none
01704     Status      : At Risk
01705                 : Variation database is under development.
01706 
01707 =cut
01708 
01709 sub get_all_somatic_VariationFeatures {
01710   my $self = shift;
01711     
01712   if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
01713     return $vf_adaptor->fetch_all_somatic_by_Slice($self);
01714   }
01715   else{
01716     return [];
01717   }
01718 }
01719 
01720 =head2 get_all_VariationFeatures_with_annotation
01721 
01722     Arg [1]     : $variation_feature_source [optional]
01723     Arg [2]     : $annotation_source [optional]
01724     Arg [3]     : $annotation_name [optional]
01725     Description : returns all germline variation features on this slice associated with a phenotype.
01726                   This function will only work correctly if the variation database has been
01727                   attached to the core database.
01728                   If $variation_feature_source is set only variations from that source
01729                   are retrieved.
01730                   If $annotation_source is set only variations whose annotations come from
01731                   $annotation_source will be retrieved.
01732                   If $annotation_name is set only variations with that annotation will be retrieved.
01733                   $annotation_name can be a phenotype's internal dbID.
01734     ReturnType  : listref of Bio::EnsEMBL::Variation::VariationFeature
01735     Exceptions  : none
01736     Caller      : contigview, snpview
01737     Status      : At Risk
01738                 : Variation database is under development.
01739 
01740 =cut
01741 
01742 sub get_all_VariationFeatures_with_annotation{
01743   my $self = shift;
01744   my $source = shift;
01745   my $p_source = shift;
01746   my $annotation = shift;
01747 
01748   if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
01749     return $vf_adaptor->fetch_all_with_annotation_by_Slice($self, $source, $p_source, $annotation);
01750   }
01751   else {
01752     return [];
01753   }
01754 }
01755 
01756 =head2 get_all_somatic_VariationFeatures_with_annotation
01757 
01758     Arg [1]     : $variation_feature_source [optional]
01759     Arg [2]     : $annotation_source [optional]
01760     Arg [3]     : $annotation_name [optional]
01761     Description : returns all somatic variation features on this slice associated with a phenotype.
01762                   (see get_all_VariationFeatures_with_annotation for further documentation)
01763     ReturnType  : listref of Bio::EnsEMBL::Variation::VariationFeature
01764     Exceptions  : none
01765     Status      : At Risk
01766                 : Variation database is under development.
01767 
01768 =cut
01769 
01770 sub get_all_somatic_VariationFeatures_with_annotation{
01771   my $self = shift;
01772   my $source = shift;
01773   my $p_source = shift;
01774   my $annotation = shift;
01775 
01776   if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
01777     return $vf_adaptor->fetch_all_somatic_with_annotation_by_Slice($self, $source, $p_source, $annotation);
01778   }
01779   else {
01780     return [] unless $vf_adaptor;
01781   }
01782 }
01783 
01784 =head2 get_all_VariationFeatures_by_VariationSet
01785 
01786     Arg [1]     : Bio::EnsEMBL:Variation::VariationSet $set
01787     Description :returns all variation features on this slice associated with a given set.
01788                  This function will only work correctly if the variation database has been
01789                  attached to the core database. 
01790     ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
01791     Exceptions : none
01792     Caller     : contigview, snpview
01793     Status     : At Risk
01794                : Variation database is under development.
01795 
01796 =cut
01797 
01798 sub get_all_VariationFeatures_by_VariationSet {
01799   my $self = shift;
01800   my $set = shift;
01801 
01802   if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
01803     return $vf_adaptor->fetch_all_by_Slice_VariationSet($self, $set);  
01804   }
01805   else {
01806     return [];
01807   }
01808 }
01809 
01810 =head2 get_all_StructuralVariations
01811 
01812         Description: DEPRECATED. Use get_all_StructuralVariationFeatures instead
01813 
01814 =cut
01815 
01816 sub get_all_StructuralVariations{
01817   my $self = shift;
01818   my $source = shift;
01819     my $study = shift;
01820     my $sv_class = shift;
01821     
01822     deprecate('Use get_all_StructuralVariationFeatures() instead.');
01823 
01824     return $self->get_all_StructuralVariationFeatures($source,$sv_class);
01825 }
01826 
01827 
01828 =head2 get_all_CopyNumberVariantProbes
01829 
01830     Description: DEPRECATED. Use get_all_CopyNumberVariantProbeFeatures instead
01831     
01832 =cut
01833 
01834 sub get_all_CopyNumberVariantProbes {
01835     my $self = shift;
01836   my $source = shift;
01837     my $study = shift;
01838     
01839     deprecate('Use get_all_CopyNumberVariantProbeFeatures() instead.');
01840     
01841     return $self->get_all_CopyNumberVariantProbeFeatures($source);
01842 }
01843 
01844 
01845 sub _get_StructuralVariationFeatureAdaptor {
01846     
01847   my $self = shift;
01848     
01849   if(!$self->adaptor()) {
01850     warning('Cannot get structural variation features without attached adaptor');
01851     return undef;
01852   }
01853     
01854   my $svf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
01855     -species  => $self->adaptor()->db()->species, 
01856     -type     => "StructuralVariationFeature"
01857   );
01858   
01859   if( $svf_adaptor ) {
01860     return $svf_adaptor;
01861   }
01862   else {
01863     warning("Variation database must be attached to core database to " .
01864             "retrieve variation information" );
01865         
01866     return undef;
01867   }
01868 }
01869 
01870 
01871 =head2 get_all_StructuralVariationFeatures
01872 
01873     Arg[1]      : string $source [optional]
01874         Arg[2]      : int $include_evidence [optional]
01875     Description : returns all structural variation features on this slice. This function will only work
01876                   correctly if the variation database has been attached to the core database.
01877                   If $source is set, only structural variation features with that source name will be 
01878                                     returned. By default, it only returns structural variant features which are not labelled 
01879                                     as "CNV_PROBE".
01880                                     If $include_evidence is set (i.e. $include_evidence=1), structural variation features from 
01881                                 both structural variation (SV) and their supporting structural variations (SSV) will be 
01882                                 returned. By default, it only returns features from structural variations (SV). 
01883                                 from structural variations.
01884     ReturnType  : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
01885     Exceptions  : none
01886     Caller      : contigview, snpview, structural_variation_features
01887     Status      : At Risk
01888 
01889 =cut
01890 
01891 sub get_all_StructuralVariationFeatures {
01892   my $self             = shift;
01893   my $source           = shift;
01894     my $include_evidence = shift;
01895     my $sv_class         = shift;
01896     
01897     my $operator = '';
01898     
01899   if (!defined($sv_class)) { 
01900         $sv_class = 'SO:0000051'; # CNV_PROBE
01901         $operator = '!'; # All but CNV_PROBE
01902     }
01903     
01904     my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor;
01905     
01906     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
01907     
01908     # Get the attrib_id
01909     my $at_adaptor = $variation_db->get_AttributeAdaptor;
01910     my $SO_term   = $at_adaptor->SO_term_for_SO_accession($sv_class);
01911     my $attrib_id = $at_adaptor->attrib_id_for_type_value('SO_term',$SO_term);
01912 
01913     if (!$attrib_id) {
01914         warning("The Sequence Ontology accession number is not found in the database");
01915     return [];
01916     }
01917     
01918     # Get the structural variations features
01919   if( $svf_adaptor ) {
01920     
01921         my $constraint = qq{ svf.class_attrib_id $operator=$attrib_id };
01922            $constraint .= qq{ AND svf.is_evidence=0 } if (!$include_evidence);
01923 
01924         if($source) {
01925       return $svf_adaptor->fetch_all_by_Slice_constraint($self, qq{$constraint AND s.name = '$source'});
01926     }else {
01927             return $svf_adaptor->fetch_all_by_Slice_constraint($self, $constraint);
01928     }
01929   }
01930   else {
01931         warning("Variation database must be attached to core database to " .
01932                         "retrieve variation information" );
01933     return [];
01934   }
01935 }
01936 
01937 
01938 =head2 get_all_StructuralVariationFeatures_by_VariationSet
01939 
01940     Arg [1]     : Bio::EnsEMBL:Variation::VariationSet $set
01941     Description :returns all structural variation features on this slice associated with a 
01942                  given set.
01943                  This function will only work correctly if the variation database has been
01944                  attached to the core database. 
01945     ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
01946     Exceptions : none
01947     Caller     : contigview, snpview
01948     Status     : At Risk
01949 
01950 =cut
01951 
01952 sub get_all_StructuralVariationFeatures_by_VariationSet {
01953   my $self = shift;
01954   my $set = shift;
01955 
01956   if (my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor) {
01957     return $svf_adaptor->fetch_all_by_Slice_VariationSet($self, $set);  
01958   }
01959   else {
01960     return [];
01961   }
01962 }
01963 
01964 
01965 =head2 get_all_CopyNumberVariantProbeFeatures
01966 
01967     Arg[1]      : string $source [optional]
01968     Description : returns all copy number variant probes on this slice. This function will only work
01969                   correctly if the variation database has been attached to the core database.
01970                   If $source is set, only CNV probes with that source name will be returned.
01971                   If $study is set, only CNV probes of that study will be returned.
01972     ReturnType  : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
01973     Exceptions  : none
01974     Caller      : contigview, snpview, structural_variation_feature
01975     Status      : At Risk
01976 
01977 =cut
01978 
01979 sub get_all_CopyNumberVariantProbeFeatures {
01980     my $self   = shift;
01981   my $source = shift;
01982     
01983     return $self->get_all_StructuralVariationFeatures($source,0,'SO:0000051');
01984 }
01985 
01986 
01987 =head2 get_all_VariationFeatures_by_Population
01988 
01989   Arg [1]    : Bio::EnsEMBL::Variation::Population
01990   Arg [2]    : $minimum_frequency (optional)
01991   Example    : $pop = $pop_adaptor->fetch_by_dbID(659);
01992                @vfs = @{$slice->get_all_VariationFeatures_by_Population(
01993                  $pop,$slice)};
01994   Description: Retrieves all variation features in a slice which are stored for
01995                a specified population. If $minimum_frequency is supplied, only
01996                variations with a minor allele frequency (MAF) greater than
01997                $minimum_frequency will be returned.
01998   Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
01999   Exceptions : throw on incorrect argument
02000   Caller     : general
02001   Status     : At Risk
02002 
02003 =cut
02004 
02005 sub get_all_VariationFeatures_by_Population {
02006   my $self = shift;
02007 
02008   if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
02009     return $vf_adaptor->fetch_all_by_Slice_Population($self, @_);
02010   }
02011   else {
02012     return [];
02013   }
02014 }
02015 
02016 
02017 
02018 
02019 =head2 get_all_IndividualSlice
02020 
02021     Args        : none
02022     Example     : my $individualSlice = $slice->get_by_Population($population);
02023     Description : Gets the specific Slice for all the individuls in the population
02024     ReturnType  : listref of Bio::EnsEMB::IndividualSlice
02025     Exceptions  : none
02026     Caller      : general
02027 
02028 =cut
02029 
02030 sub get_all_IndividualSlice{
02031     my $self = shift;
02032 
02033     my $individualSliceFactory = Bio::EnsEMBL::IndividualSliceFactory->new(
02034                                        -START   => $self->{'start'},
02035                                        -END     => $self->{'end'},
02036                                        -STRAND  => $self->{'strand'},
02037                                        -ADAPTOR => $self->adaptor(),
02038                                        -SEQ_REGION_NAME => $self->{'seq_region_name'},
02039                                        -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
02040                                        -COORD_SYSTEM    => $self->{'coord_system'},
02041                                        );
02042     return $individualSliceFactory->get_all_IndividualSlice();
02043 }
02044 
02045 =head2 get_by_Individual
02046 
02047     Arg[1]      : Bio::EnsEMBL::Variation::Individual $individual
02048     Example     : my $individualSlice = $slice->get_by_Individual($individual);
02049     Description : Gets the specific Slice for the individual
02050     ReturnType  : Bio::EnsEMB::IndividualSlice
02051     Exceptions  : none
02052     Caller      : general
02053 
02054 =cut
02055 
02056 sub get_by_Individual{
02057     my $self = shift;
02058     my $individual = shift;
02059 
02060     return Bio::EnsEMBL::IndividualSlice->new(
02061                       -START   => $self->{'start'},
02062                       -END     => $self->{'end'},
02063                       -STRAND  => $self->{'strand'},
02064                       -ADAPTOR => $self->adaptor(),
02065 #                     -SEQ     => $self->{'seq'},
02066                       -SEQ_REGION_NAME => $self->{'seq_region_name'},
02067                       -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
02068                       -COORD_SYSTEM    => $self->{'coord_system'},
02069                       -INDIVIDUAL     => $individual);
02070 
02071 }
02072 
02073 
02074 
02075 =head2 get_by_strain
02076 
02077     Arg[1]      : string $strain
02078     Example     : my $strainSlice = $slice->get_by_strain($strain);
02079     Description : Gets the specific Slice for the strain
02080     ReturnType  : Bio::EnsEMB::StrainSlice
02081     Exceptions  : none
02082     Caller      : general
02083 
02084 =cut
02085 
02086 sub get_by_strain{
02087     my $self = shift;
02088     my $strain_name = shift;
02089 
02090     return Bio::EnsEMBL::StrainSlice->new(
02091                       -START   => $self->{'start'},
02092                       -END     => $self->{'end'},
02093                       -STRAND  => $self->{'strand'},
02094                       -ADAPTOR => $self->adaptor(),
02095                       -SEQ     => $self->{'seq'},
02096                       -SEQ_REGION_NAME => $self->{'seq_region_name'},
02097                       -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
02098                       -COORD_SYSTEM    => $self->{'coord_system'},
02099                       -STRAIN_NAME     => $strain_name);
02100 
02101 }
02102 
02103 sub calculate_theta{
02104     my $self = shift;
02105     my $strains = shift;
02106     my $feature = shift; #optional parameter. Name of the feature in the Slice you want to calculate
02107 
02108     if(!$self->adaptor()) {
02109     warning('Cannot get variation features without attached adaptor');
02110     return 0;
02111     }
02112     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
02113 
02114     unless($variation_db) {
02115     warning("Variation database must be attached to core database to " .
02116         "retrieve variation information" );
02117     return 0;
02118     }
02119 
02120     #need to get coverage regions for the slice in the different strains
02121     my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
02122     my $strain;
02123     my $differences = [];
02124     my $slices = [];
02125     if ($coverage_adaptor){
02126     my $num_strains = scalar(@{$strains}) +1;
02127     if (!defined $feature){
02128         #we want to calculate for the whole slice
02129         push @{$slices}, $self; #add the slice as the slice to calculate the theta value
02130     }
02131     else{
02132         #we have features, get the slices for the different features
02133         my $features = $self->get_all_Exons();
02134         map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
02135     }
02136     my $length_regions = 0;
02137     my $snps = 0;
02138     my $theta = 0;
02139     my $last_position = 0;
02140     #get all the differences in the slice coordinates
02141     foreach my $strain_name (@{$strains}){
02142         my $strain = $self->get_by_strain($strain_name); #get the strainSlice for the strain
02143 
02144         my $results = $strain->get_all_differences_Slice;
02145         push @{$differences}, @{$results} if (defined $results);
02146     }
02147     #when we finish, we have, in max_level, the regions covered by all the sample
02148     #sort the differences by the genomic position
02149     my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
02150     foreach my $slice (@{$slices}){
02151         my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
02152         if (defined $regions_covered){
02153         foreach my $range (@{$regions_covered}){
02154             $length_regions += ($range->[1] - $range->[0]) + 1; #add the length of the genomic region
02155             for (my $i = $last_position;$i<@differences_sorted;$i++){
02156             if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
02157                 $snps++; #count differences in the region
02158             }
02159             elsif ($differences_sorted[$i]->end > $range->[1]){
02160                 $last_position = $i;
02161                 last;
02162             }
02163             }
02164         }
02165         #when all the ranges have been iterated, calculate rho
02166         #this is an intermediate variable called a in the formula
02167         #  a = sum i=2..strains 1/i-1
02168         }
02169     }
02170     my $a = _calculate_a($num_strains);
02171     $theta = $snps / ($a * $length_regions);
02172     return $theta;
02173     }
02174     else{
02175     return 0;
02176     }
02177 }
02178 
02179 
02180 
02181 
02182 sub _calculate_a{
02183     my $max_level = shift;
02184 
02185     my $a = 0;
02186     for (my $i = 2; $i <= $max_level+1;$i++){
02187     $a += 1/($i-1);
02188     }
02189     return $a;
02190 }
02191 
02192 sub calculate_pi{
02193     my $self = shift;
02194     my $strains = shift;
02195     my $feature = shift;
02196 
02197     if(!$self->adaptor()) {
02198     warning('Cannot get variation features without attached adaptor');
02199     return 0;
02200     }
02201     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
02202 
02203     unless($variation_db) {
02204     warning("Variation database must be attached to core database to " .
02205         "retrieve variation information" );
02206     return 0;
02207     }
02208 
02209     #need to get coverage regions for the slice in the different strains
02210     my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
02211     my $differences = [];
02212     my $slices = [];
02213     if ($coverage_adaptor){
02214     my $num_strains = scalar(@{$strains}) +1;
02215     if (!defined $feature){
02216         #we want to calculate for the whole slice
02217         push @{$slices}, $self; #add the slice as the slice to calculate the theta value
02218     }
02219     else{
02220         #we have features, get the slices for the different features
02221         my $features = $self->get_all_Exons();
02222         map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
02223     }
02224     my @range_differences = ();
02225     my $pi = 0;
02226     my $regions = 0;
02227     my $last_position = 0; #last position visited in the sorted list of differences
02228     my $triallelic = 0;
02229     my $is_triallelic = 0;
02230     foreach my $slice (@{$slices}){
02231         foreach my $strain_name (@{$strains}){
02232         my $strain = $slice->get_by_strain($strain_name); #get the strainSlice for the strain
02233         my $results = $strain->get_all_differences_Slice;
02234         push @{$differences}, @{$results} if (defined $results);
02235         }
02236         my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
02237 
02238         my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
02239         #when we finish, we have, in max_level, the regions covered by all the sample
02240         #sort the differences
02241         if (defined $regions_covered){
02242         foreach my $range (@{$regions_covered}){
02243             for (my $i = $last_position;$i<@differences_sorted;$i++){
02244             if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
02245                 #check wether it is the same region or different
02246                 if (!defined $range_differences[0] || ($differences_sorted[$i]->start == $range_differences[0]->start)){
02247                 if (defined $range_differences[0] && ($differences_sorted[$i]->allele_string ne $range_differences[0]->allele_string)){
02248                     $is_triallelic = 1;
02249                 }
02250                 push @range_differences, $differences_sorted[$i];
02251                 }
02252                 else{
02253                 #new site, calc pi for the previous one
02254                 $pi += 2 * (@range_differences/($num_strains)) * ( 1 - (@range_differences/$num_strains));
02255                 if ($is_triallelic) {
02256                     $triallelic++;
02257                     $is_triallelic = 0;
02258                 }
02259                 $regions++;
02260                 @range_differences = ();
02261                 #and start a new range
02262                 push @range_differences, $differences_sorted[$i];
02263                 }
02264             }
02265             elsif ($differences_sorted[$i]->end > $range->[1]){
02266                 $last_position = $i;
02267                 last;
02268             }
02269             }
02270             #calculate pi for last site, if any
02271             if (defined $range_differences[0]){
02272             $pi += 2 * (@range_differences/$num_strains) * ( 1 - (@range_differences/$num_strains));
02273             $regions++;
02274             }
02275         }
02276         }
02277         $pi = $pi / $regions; #calculate average pi
02278         print "Regions with variations in region $regions and triallelic $triallelic\n\n";
02279     }
02280     return $pi;
02281     }
02282     else{
02283     return 0;
02284     }
02285 
02286 }
02287 
02288 
02289 
02290 
02291 
02292 =head2 get_all_genotyped_VariationFeatures
02293 
02294     Args       : none
02295     Function   : returns all variation features on this slice that have been genotyped. This function will only work
02296                 correctly if the variation database has been attached to the core database.
02297     ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
02298     Exceptions : none
02299     Caller     : contigview, snpview, ldview
02300     Status     : At Risk
02301                : Variation database is under development.
02302 
02303 =cut
02304 
02305 sub get_all_genotyped_VariationFeatures{
02306   my $self = shift;
02307 
02308   if( my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
02309     return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
02310   } 
02311   else {
02312     return [];
02313   }
02314 }
02315 
02316 
02317 =head2 get_all_SNPs
02318 
02319  Description: DEPRECATED. Use get_all_VariationFeatures insted
02320 
02321 =cut
02322 
02323 sub get_all_SNPs {
02324   my $self = shift;
02325 
02326   deprecate('Use get_all_VariationFeatures() instead.');
02327 
02328   my $snps;
02329   my $vf = $self->get_all_genotyped_VariationFeatures();
02330   if( $vf->[0] ) {
02331       #necessary to convert the VariationFeatures into SNP objects
02332       foreach my $variation_feature (@{$vf}){
02333       push @{$snps},$variation_feature->convert_to_SNP();
02334       }
02335       return $snps;
02336   } else {
02337     return [];
02338   }
02339 }
02340 
02341 =head2 get_all_genotyped_SNPs
02342 
02343   Description   : DEPRECATED. Use get_all_genotyped_VariationFeatures insted
02344 
02345 =cut
02346 
02347 sub get_all_genotyped_SNPs {
02348   my $self = shift;
02349 
02350   deprecate("Use get_all_genotyped_VariationFeatures instead");
02351   my $vf = $self->get_all_genotyped_VariationFeatures;
02352   my $snps;
02353   if ($vf->[0]){
02354       foreach my $variation_feature (@{$vf}){
02355       push @{$snps},$variation_feature->convert_to_SNP();
02356       }
02357       return $snps;
02358   } else {
02359       return [];
02360   }
02361 }
02362 
02363 sub get_all_SNPs_transcripts {
02364   my $self = shift;
02365 
02366   deprecate("DEPRECATED");
02367 
02368   return [];
02369 
02370 }
02371 
02372 
02373 
02374 =head2 get_all_Genes
02375 
02376   Arg [1]    : (optional) string $logic_name
02377                The name of the analysis used to generate the genes to retrieve
02378   Arg [2]    : (optional) string $dbtype
02379                The dbtype of genes to obtain.  This assumes that the db has
02380                been added to the DBAdaptor under this name (using the
02381                DBConnection::add_db_adaptor method).
02382   Arg [3]    : (optional) boolean $load_transcripts
02383                If set to true, transcripts will be loaded immediately rather
02384                than being lazy-loaded on request.  This will result in a
02385                significant speed up if the Transcripts and Exons are going to
02386                be used (but a slow down if they are not).
02387   Arg [4]    : (optional) string $source
02388                The source of the genes to retrieve.
02389   Arg [5]    : (optional) string $biotype
02390                The biotype of the genes to retrieve.
02391   Example    : @genes = @{$slice->get_all_Genes};
02392   Description: Retrieves all genes that overlap this slice.
02393   Returntype : listref of Bio::EnsEMBL::Genes
02394   Exceptions : none
02395   Caller     : none
02396   Status     : Stable
02397 
02398 =cut
02399 
02400 sub get_all_Genes{
02401   my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
02402 
02403   if(!$self->adaptor()) {
02404     warning('Cannot get Genes without attached adaptor');
02405     return [];
02406   }
02407 
02408   my $ga;
02409    if($dbtype) {
02410      my $db = $reg->get_db($self->adaptor()->db(), $dbtype);
02411      if(defined($db)){
02412        $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" );
02413      }
02414      else{
02415        $ga = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Gene" );
02416      }
02417      if(!defined $ga) {
02418        warning( "$dbtype genes not available" );
02419        return [];
02420      }
02421  } else {
02422     $ga =  $self->adaptor->db->get_GeneAdaptor();
02423    }
02424 
02425   return $ga->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
02426 }
02427 
02428 =head2 get_all_Genes_by_type
02429 
02430   Arg [1]    : string $type
02431                The biotype of genes wanted.
02432   Arg [2]    : (optional) string $logic_name
02433   Arg [3]    : (optional) boolean $load_transcripts
02434                If set to true, transcripts will be loaded immediately rather
02435                than being lazy-loaded on request.  This will result in a
02436                significant speed up if the Transcripts and Exons are going to
02437                be used (but a slow down if they are not).
02438   Example    : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
02439                'ensembl')};
02440   Description: Retrieves genes that overlap this slice of biotype $type.
02441                This is primarily used by the genebuilding code when several
02442                biotypes of genes are used.
02443 
02444                The logic name is the analysis of the genes that are retrieved.
02445                If not provided all genes will be retrieved instead.
02446 
02447   Returntype : listref of Bio::EnsEMBL::Genes
02448   Exceptions : none
02449   Caller     : genebuilder, general
02450   Status     : Stable
02451 
02452 =cut
02453 
02454 sub get_all_Genes_by_type{
02455   my ($self, $type, $logic_name, $load_transcripts) = @_;
02456 
02457   if(!$self->adaptor()) {
02458     warning('Cannot get Genes without attached adaptor');
02459     return [];
02460   }
02461 
02462   return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
02463 }
02464 
02465 
02466 =head2 get_all_Genes_by_source
02467 
02468   Arg [1]    : string source
02469   Arg [2]    : (optional) boolean $load_transcripts
02470                If set to true, transcripts will be loaded immediately rather
02471                than being lazy-loaded on request.  This will result in a
02472                significant speed up if the Transcripts and Exons are going to
02473                be used (but a slow down if they are not).
02474   Example    : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
02475   Description: Retrieves genes that overlap this slice of source $source.
02476 
02477   Returntype : listref of Bio::EnsEMBL::Genes
02478   Exceptions : none
02479   Caller     : general
02480   Status     : Stable
02481 
02482 =cut
02483 
02484 sub get_all_Genes_by_source {
02485   my ($self, $source, $load_transcripts) = @_;
02486 
02487   if(!$self->adaptor()) {
02488     warning('Cannot get Genes without attached adaptor');
02489     return [];
02490   }
02491 
02492   return  $self->get_all_Genes(undef, undef, $load_transcripts, $source);
02493 }
02494 
02495 =head2 get_all_Transcripts
02496 
02497   Arg [1]    : (optional) boolean $load_exons
02498                If set to true exons will not be lazy-loaded but will instead
02499                be loaded right away.  This is faster if the exons are
02500                actually going to be used right away.
02501   Arg [2]    : (optional) string $logic_name
02502                the logic name of the type of features to obtain
02503   Arg [3]    : (optional) string $db_type
02504   Example    : @transcripts = @{$slice->get_all_Transcripts)_};
02505   Description: Gets all transcripts which overlap this slice.  If you want to
02506                specify a particular analysis or type, then you are better off
02507                using get_all_Genes or get_all_Genes_by_type and iterating
02508                through the transcripts of each gene.
02509   Returntype : reference to a list of Bio::EnsEMBL::Transcripts
02510   Exceptions : none
02511   Caller     : general
02512   Status     : Stable
02513 
02514 =cut
02515 
02516 sub get_all_Transcripts {
02517   my $self = shift;
02518   my $load_exons = shift;
02519   my $logic_name = shift;
02520   my $dbtype     = shift;
02521   if(!$self->adaptor()) {
02522     warning('Cannot get Transcripts without attached adaptor');
02523     return [];
02524   }
02525 
02526 
02527   my $ta;
02528   if($dbtype) {
02529     my $db = $reg->get_db($self->adaptor()->db(), $dbtype);
02530     if(defined($db)){
02531       $ta = $reg->get_adaptor( $db->species(), $db->group(), "Transcript" );
02532     } else{
02533       $ta = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Transcript" );
02534     }
02535     if(!defined $ta) {
02536       warning( "$dbtype genes not available" );
02537       return [];
02538     }
02539   } else {
02540     $ta =  $self->adaptor->db->get_TranscriptAdaptor();
02541   }
02542   return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name);
02543 }
02544 
02545 
02546 =head2 get_all_Exons
02547 
02548   Arg [1]    : none
02549   Example    : @exons = @{$slice->get_all_Exons};
02550   Description: Gets all exons which overlap this slice.  Note that these exons
02551                will not be associated with any transcripts, so this may not
02552                be terribly useful.
02553   Returntype : reference to a list of Bio::EnsEMBL::Exons
02554   Exceptions : none
02555   Caller     : general
02556   Status     : Stable
02557 
02558 =cut
02559 
02560 sub get_all_Exons {
02561   my $self = shift;
02562 
02563   if(!$self->adaptor()) {
02564     warning('Cannot get Exons without attached adaptor');
02565     return [];
02566   }
02567 
02568   return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
02569 }
02570 
02571 
02572 
02573 =head2 get_all_QtlFeatures
02574 
02575   Args       : none
02576   Example    : none
02577   Description: returns overlapping QtlFeatures
02578   Returntype : listref Bio::EnsEMBL::Map::QtlFeature
02579   Exceptions : none
02580   Caller     : general
02581   Status     : Stable
02582 
02583 =cut
02584 
02585 sub get_all_QtlFeatures {
02586   my $self = shift;
02587 
02588   if(!$self->adaptor()) {
02589     warning('Cannot get QtlFeatures without attached adaptor');
02590     return [];
02591   }
02592 
02593   my $qfAdaptor;
02594   if( $self->adaptor()) {
02595     $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor();
02596   } else {
02597     return [];
02598   }
02599 
02600   return $qfAdaptor->fetch_all_by_Slice_constraint( $self );
02601 }
02602 
02603 
02604 
02605 
02606 =head2 get_all_KaryotypeBands
02607 
02608   Arg [1]    : none
02609   Example    : @kary_bands = @{$slice->get_all_KaryotypeBands};
02610   Description: Retrieves the karyotype bands which this slice overlaps.
02611   Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
02612   Exceptions : none
02613   Caller     : general, contigview
02614   Status     : Stable
02615 
02616 =cut
02617 
02618 sub get_all_KaryotypeBands {
02619   my ($self) = @_;
02620 
02621   if(!$self->adaptor()) {
02622     warning('Cannot get KaryotypeBands without attached adaptor');
02623     return [];
02624   }
02625 
02626   my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor();
02627   return $kadp->fetch_all_by_Slice($self);
02628 }
02629 
02630 
02631 
02632 
02633 =head2 get_repeatmasked_seq
02634 
02635   Arg [1]    : listref of strings $logic_names (optional)
02636   Arg [2]    : int $soft_masking_enable (optional)
02637   Arg [3]    : hash reference $not_default_masking_cases (optional, default is {})
02638                The values are 0 or 1 for hard and soft masking respectively
02639                The keys of the hash should be of 2 forms
02640                "repeat_class_" . $repeat_consensus->repeat_class,
02641                 e.g. "repeat_class_SINE/MIR"
02642                "repeat_name_" . $repeat_consensus->name
02643                 e.g. "repeat_name_MIR"
02644                depending on which base you want to apply the not default
02645                masking either the repeat_class or repeat_name. Both can be
02646                specified in the same hash at the same time, but in that case,
02647                repeat_name setting has priority over repeat_class. For example,
02648                you may have hard masking as default, and you may want soft
02649                masking of all repeat_class SINE/MIR, but repeat_name AluSp
02650                (which are also from repeat_class SINE/MIR).
02651                Your hash will be something like {"repeat_class_SINE/MIR" => 1,
02652                                                  "repeat_name_AluSp" => 0}
02653   Example    : $rm_slice = $slice->get_repeatmasked_seq();
02654                $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
02655   Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
02656                masked sequence instead of the regular sequence.
02657                Sequence returned by this new slice will have repeat regions
02658                hardmasked by default (sequence replaced by N) or
02659                or soft-masked when arg[2] = 1 (sequence in lowercase)
02660                Will only work with database connection to get repeat features.
02661   Returntype : Bio::EnsEMBL::RepeatMaskedSlice
02662   Exceptions : none
02663   Caller     : general
02664   Status     : Stable
02665 
02666 =cut
02667 
02668 sub get_repeatmasked_seq {
02669     my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
02670 
02671     return Bio::EnsEMBL::RepeatMaskedSlice->new
02672       (-START   => $self->{'start'},
02673        -END     => $self->{'end'},
02674        -STRAND  => $self->{'strand'},
02675        -ADAPTOR => $self->adaptor(),
02676        -SEQ     => $self->{'seq'},
02677        -SEQ_REGION_NAME => $self->{'seq_region_name'},
02678        -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
02679        -COORD_SYSTEM    => $self->{'coord_system'},
02680        -REPEAT_MASK     => $logic_names,
02681        -SOFT_MASK       => $soft_mask,
02682        -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
02683 }
02684 
02685 
02686 
02687 =head2 _mask_features
02688 
02689   Arg [1]    : reference to a string $dnaref
02690   Arg [2]    : array_ref $repeats
02691                reference to a list Bio::EnsEMBL::RepeatFeature
02692                give the list of coordinates to replace with N or with
02693                lower case
02694   Arg [3]    : int $soft_masking_enable (optional)
02695   Arg [4]    : hash reference $not_default_masking_cases (optional, default is {})
02696                The values are 0 or 1 for hard and soft masking respectively
02697                The keys of the hash should be of 2 forms
02698                "repeat_class_" . $repeat_consensus->repeat_class,
02699                 e.g. "repeat_class_SINE/MIR"
02700                "repeat_name_" . $repeat_consensus->name
02701                 e.g. "repeat_name_MIR"
02702                depending on which base you want to apply the not default masking either
02703                the repeat_class or repeat_name. Both can be specified in the same hash
02704                at the same time, but in that case, repeat_name setting has priority over
02705                repeat_class. For example, you may have hard masking as default, and
02706                you may want soft masking of all repeat_class SINE/MIR,
02707                but repeat_name AluSp (which are also from repeat_class SINE/MIR).
02708                Your hash will be something like {"repeat_class_SINE/MIR" => 1,
02709                                                  "repeat_name_AluSp" => 0}
02710   Example    : none
02711   Description: replaces string positions described in the RepeatFeatures
02712                with Ns (default setting), or with the lower case equivalent
02713                (soft masking).  The reference to a dna string which is passed
02714                is changed in place.
02715   Returntype : none
02716   Exceptions : none
02717   Caller     : seq
02718   Status     : Stable
02719 
02720 =cut
02721 
02722 sub _mask_features {
02723   my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
02724 
02725   $soft_mask = 0 unless (defined $soft_mask);
02726   $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
02727 
02728   # explicit CORE::length call, to avoid any confusion with the Slice
02729   # length method
02730   my $dnalen = CORE::length($$dnaref);
02731 
02732  REP:foreach my $old_f (@{$repeats}) {
02733     my $f = $old_f->transfer( $self );
02734     my $start  = $f->start;
02735     my $end    = $f->end;
02736     my $length = ($end - $start) + 1;
02737 
02738     # check if we get repeat completely outside of expected slice range
02739     if ($end < 1 || $start > $dnalen) {
02740       # warning("Unexpected: Repeat completely outside slice coordinates.");
02741       next REP;
02742     }
02743 
02744     # repeat partly outside slice range, so correct
02745     # the repeat start and length to the slice size if needed
02746     if ($start < 1) {
02747       $start = 1;
02748       $length = ($end - $start) + 1;
02749     }
02750 
02751     # repeat partly outside slice range, so correct
02752     # the repeat end and length to the slice size if needed
02753     if ($end > $dnalen) {
02754       $end = $dnalen;
02755       $length = ($end - $start) + 1;
02756     }
02757 
02758     $start--;
02759 
02760     my $padstr;
02761     # if we decide to define masking on the base of the repeat_type, we'll need
02762     # to add the following, and the other commented line few lines below.
02763     # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type;
02764     my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
02765     my $rc_name = "repeat_name_" . $f->repeat_consensus->name;
02766 
02767     my $masking_type;
02768     # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type});
02769     $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
02770     $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});
02771 
02772     $masking_type = $soft_mask unless (defined $masking_type);
02773 
02774     if ($masking_type) {
02775       $padstr = lc substr ($$dnaref,$start,$length);
02776     } else {
02777       $padstr = 'N' x $length;
02778     }
02779     substr ($$dnaref,$start,$length) = $padstr;
02780   }
02781 }
02782 
02783 
02784 =head2 get_all_SearchFeatures
02785 
02786   Arg [1]    : scalar $ticket_ids
02787   Example    : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
02788   Description: Retreives all search features for stored blast
02789                results for the ticket that overlap this slice
02790   Returntype : listref of Bio::EnsEMBL::SeqFeatures
02791   Exceptions : none
02792   Caller     : general (webby!)
02793   Status     : Stable
02794 
02795 =cut
02796 
02797 sub get_all_SearchFeatures {
02798   my $self = shift;
02799   my $ticket = shift;
02800   local $_;
02801   unless($ticket) {
02802     throw("ticket argument is required");
02803   }
02804 
02805   if(!$self->adaptor()) {
02806     warning("Cannot get SearchFeatures without an attached adaptor");
02807     return [];
02808   }
02809 
02810   my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
02811 
02812   my $offset = $self->start-1;
02813 
02814   my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
02815 
02816   foreach( @$features ) {
02817     $_->start( $_->start - $offset );
02818     $_->end(   $_->end   - $offset );
02819   };
02820   return $features;
02821 
02822 }
02823 
02824 =head2 get_all_AssemblyExceptionFeatures
02825 
02826   Arg [1]    : string $set (optional)
02827   Example    : $slice->get_all_AssemblyExceptionFeatures();
02828   Description: Retreives all misc features which overlap this slice. If
02829                a set code is provided only features which are members of
02830                the requested set are returned.
02831   Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
02832   Exceptions : none
02833   Caller     : general
02834   Status     : Stable
02835 
02836 =cut
02837 
02838 sub get_all_AssemblyExceptionFeatures {
02839   my $self = shift;
02840   my $misc_set = shift;
02841 
02842   my $adaptor = $self->adaptor();
02843 
02844   if(!$adaptor) {
02845     warning('Cannot retrieve features without attached adaptor.');
02846     return [];
02847   }
02848 
02849   my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor();
02850 
02851   return $aefa->fetch_all_by_Slice($self);
02852 }
02853 
02854 
02855 
02856 =head2 get_all_MiscFeatures
02857 
02858   Arg [1]    : string $set (optional)
02859   Arg [2]    : string $database (optional)
02860   Example    : $slice->get_all_MiscFeatures('cloneset');
02861   Description: Retreives all misc features which overlap this slice. If
02862                a set code is provided only features which are members of
02863                the requested set are returned.
02864   Returntype : listref of Bio::EnsEMBL::MiscFeatures
02865   Exceptions : none
02866   Caller     : general
02867   Status     : Stable
02868 
02869 =cut
02870 
02871 sub get_all_MiscFeatures {
02872   my $self = shift;
02873   my $misc_set = shift;
02874   my $dbtype = shift;
02875   my $msa;
02876 
02877   my $adaptor = $self->adaptor();
02878   if(!$adaptor) {
02879     warning('Cannot retrieve features without attached adaptor.');
02880     return [];
02881   }
02882 
02883   my $mfa;
02884   if($dbtype) {
02885     my $db = $reg->get_db($adaptor->db(), $dbtype);
02886     if(defined($db)){
02887       $mfa = $reg->get_adaptor( lc($db->species()), $db->group(), "miscfeature" );
02888     } else{
02889       $mfa = $reg->get_adaptor( $adaptor->db()->species(), $dbtype, "miscfeature" );
02890     }
02891     if(!defined $mfa) {
02892       warning( "$dbtype misc features not available" );
02893       return [];
02894     }
02895   } else {
02896     $mfa =  $adaptor->db->get_MiscFeatureAdaptor();
02897   }
02898 
02899   if($misc_set) {
02900     return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set);
02901   }
02902 
02903   return $mfa->fetch_all_by_Slice($self);
02904 }
02905 
02906 =head2 get_all_MarkerFeatures
02907 
02908   Arg [1]    : (optional) string logic_name
02909                The logic name of the marker features to retrieve
02910   Arg [2]    : (optional) int $priority
02911                Lower (exclusive) priority bound of the markers to retrieve
02912   Arg [3]    : (optional) int $map_weight
02913                Upper (exclusive) priority bound of the markers to retrieve
02914   Example    : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
02915   Description: Retrieves all markers which lie on this slice fulfilling the
02916                specified map_weight and priority parameters (if supplied).
02917   Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
02918   Exceptions : none
02919   Caller     : contigview, general
02920   Status     : Stable
02921 
02922 =cut
02923 
02924 sub get_all_MarkerFeatures {
02925   my ($self, $logic_name, $priority, $map_weight) = @_;
02926 
02927   if(!$self->adaptor()) {
02928     warning('Cannot retrieve MarkerFeatures without attached adaptor.');
02929     return [];
02930   }
02931 
02932   my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
02933 
02934   my $feats = $ma->fetch_all_by_Slice_and_priority($self,
02935                           $priority,
02936                           $map_weight,
02937                           $logic_name);
02938   return $feats;
02939 }
02940 
02941 
02942 =head2 get_MarkerFeatures_by_Name
02943 
02944   Arg [1]    : string marker Name
02945                The name (synonym) of the marker feature(s) to retrieve
02946   Example    : my @markers = @{$slice->get_MarkerFeatures_by_Name('z1705')};
02947   Description: Retrieves all markers with this ID
02948   Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
02949   Exceptions : none
02950   Caller     : contigview, general
02951   Status     : Stable
02952 
02953 =cut
02954 
02955 sub get_MarkerFeatures_by_Name {
02956   my ($self, $name) = @_;
02957 
02958   if(!$self->adaptor()) {
02959     warning('Cannot retrieve MarkerFeatures without attached adaptor.');
02960     return [];
02961   }
02962 
02963   my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
02964 
02965   my $feats = $ma->fetch_all_by_Slice_and_MarkerName($self, $name);
02966   return $feats;
02967 }
02968 
02969 
02970 =head2 get_all_compara_DnaAlignFeatures
02971 
02972   Arg [1]    : string $qy_species
02973                The name of the species to retrieve similarity features from
02974   Arg [2]    : string $qy_assembly
02975                The name of the assembly to retrieve similarity features from
02976   Arg [3]    : string $type
02977                The type of the alignment to retrieve similarity features from
02978   Arg [4]    : <optional> compara dbadptor to use.
02979   Example    : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
02980                                 'MGSC3',
02981                                 'WGA');
02982   Description: Retrieves a list of DNA-DNA Alignments to the species specified
02983                by the $qy_species argument.
02984                The compara database must be attached to the core database
02985                for this call to work correctly.  As well the compara database
02986                must have the core dbadaptors for both this species, and the
02987                query species added to function correctly.
02988   Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
02989   Exceptions : warning if compara database is not available
02990   Caller     : contigview
02991   Status     : Stable
02992 
02993 =cut
02994 
02995 sub get_all_compara_DnaAlignFeatures {
02996   my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
02997 
02998   if(!$self->adaptor()) {
02999     warning("Cannot retrieve DnaAlignFeatures without attached adaptor");
03000     return [];
03001   }
03002 
03003   unless($qy_species && $alignment_type # && $qy_assembly
03004   ) {
03005     throw("Query species and assembly and alignmemt type arguments are required");
03006   }
03007 
03008   if(!defined($compara_db)){
03009     $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
03010   }
03011   unless($compara_db) {
03012     warning("Compara database must be attached to core database or passed ".
03013         "as an argument to " .
03014         "retrieve compara information");
03015     return [];
03016   }
03017 
03018   my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
03019   return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
03020 }
03021 
03022 =head2 get_all_compara_Syntenies
03023 
03024   Arg [1]    : string $query_species e.g. "Mus_musculus" or "Mus musculus"
03025   Arg [2]    : string $method_link_type, default is "SYNTENY"
03026   Arg [3]    : <optional> compara dbadaptor to use.
03027   Description: gets all the compara syntenyies for a specfic species
03028   Returns    : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
03029   Status     : Stable
03030 
03031 =cut
03032 
03033 sub get_all_compara_Syntenies {
03034   my ($self, $qy_species, $method_link_type, $compara_db) = @_;
03035 
03036   if(!$self->adaptor()) {
03037     warning("Cannot retrieve features without attached adaptor");
03038     return [];
03039   }
03040 
03041   unless($qy_species) {
03042     throw("Query species and assembly arguments are required");
03043   }
03044 
03045   unless (defined $method_link_type) {
03046     $method_link_type = "SYNTENY";
03047   }
03048 
03049   if(!defined($compara_db)){
03050     $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
03051   }
03052   unless($compara_db) {
03053     warning("Compara database must be attached to core database or passed ".
03054         "as an argument to " .
03055         "retrieve compara information");
03056     return [];
03057   }
03058   my $gdba = $compara_db->get_GenomeDBAdaptor();
03059   my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
03060   my $dfa = $compara_db->get_DnaFragAdaptor();
03061   my $sra = $compara_db->get_SyntenyRegionAdaptor();
03062 
03063   my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
03064   my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
03065   my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
03066 
03067   my $cs = $self->coord_system()->name();
03068   my $sr = $self->seq_region_name();
03069   my ($dnafrag) = @{$dfa->fetch_all_by_GenomeDB_region($this_gdb, $cs, $sr)};
03070   return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end);
03071 }
03072 
03073 =head2 get_all_Haplotypes
03074 
03075   Arg [1]    : (optional) boolean $lite_flag
03076                if true lightweight haplotype objects are used
03077   Example    : @haplotypes = $slice->get_all_Haplotypes;
03078   Description: Retrieves all of the haplotypes on this slice.  Only works
03079                if the haplotype adaptor has been attached to the core adaptor
03080                via $dba->add_db_adaptor('haplotype', $hdba);
03081   Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
03082   Exceptions : warning is Haplotype database is not available
03083   Caller     : contigview, general
03084   Status     : Stable
03085 
03086 =cut
03087 
03088 sub get_all_Haplotypes {
03089   my($self, $lite_flag) = @_;
03090 
03091   if(!$self->adaptor()) {
03092     warning("Cannot retrieve features without attached adaptor");
03093     return [];
03094   }
03095 
03096   my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
03097 
03098   unless($haplo_db) {
03099     warning("Haplotype database must be attached to core database to " .
03100         "retrieve haplotype information" );
03101     return [];
03102   }
03103 
03104   my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
03105 
03106   my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
03107 
03108   return $haplotypes;
03109 }
03110 
03111 
03112 sub get_all_DASFactories {
03113    my $self = shift;
03114    return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
03115 }
03116 
03117 sub get_all_DASFeatures_dsn {
03118    my ($self, $source_type, $dsn) = @_;
03119 
03120   if(!$self->adaptor()) {
03121     warning("Cannot retrieve features without attached adaptor");
03122     return [];
03123   }
03124   my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
03125 
03126   return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
03127 }
03128 
03129 =head2 get_all_DAS_Features
03130 
03131   Arg [1]    : none
03132   Example    : $features = $slice->get_all_DASFeatures;
03133   Description: Retreives a hash reference to a hash of DAS feature
03134                sets, keyed by the DNS, NOTE the values of this hash
03135                are an anonymous array containing:
03136                 (1) a pointer to an array of features;
03137                 (2) a pointer to the DAS stylesheet
03138   Returntype : hashref of Bio::SeqFeatures
03139   Exceptions : ?
03140   Caller     : webcode
03141   Status     : Stable
03142 
03143 =cut
03144 sub get_all_DAS_Features{
03145   my ($self) = @_;
03146 
03147   $self->{_das_features} ||= {}; # Cache
03148   $self->{_das_styles} ||= {}; # Cache
03149   $self->{_das_segments} ||= {}; # Cache
03150   my %das_features;
03151   my %das_styles;
03152   my %das_segments;
03153   my $slice = $self;
03154 
03155   foreach my $dasfact( @{$self->get_all_DASFactories} ){
03156     my $dsn = $dasfact->adaptor->dsn;
03157     my $name = $dasfact->adaptor->name;
03158 #    my $type = $dasfact->adaptor->type;
03159     my $url = $dasfact->adaptor->url;
03160 
03161  my ($type) = $dasfact->adaptor->mapping;
03162  if (ref $type eq 'ARRAY') {
03163    $type = shift @$type;
03164  }
03165  $type ||= $dasfact->adaptor->type;
03166     # Construct a cache key : SOURCE_URL/TYPE
03167     # Need the type to handle sources that serve multiple types of features
03168 
03169     my $key = join('/', $name, $type);
03170     if( $self->{_das_features}->{$key} ){ # Use cached
03171         $das_features{$name} = $self->{_das_features}->{$key};
03172         $das_styles{$name} = $self->{_das_styles}->{$key};
03173         $das_segments{$name} = $self->{_das_segments}->{$key};
03174     } else { # Get fresh data
03175         my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
03176         $self->{_das_features}->{$key} = $featref;
03177         $self->{_das_styles}->{$key} = $styleref;
03178         $self->{_das_segments}->{$key} = $segref;
03179         $das_features{$name} = $featref;
03180         $das_styles{$name} = $styleref;
03181         $das_segments{$name} = $segref;
03182     }
03183   }
03184 
03185   return (\%das_features, \%das_styles, \%das_segments);
03186 }
03187 
03188 sub get_all_DASFeatures{
03189    my ($self, $source_type) = @_;
03190 
03191 
03192   if(!$self->adaptor()) {
03193     warning("Cannot retrieve features without attached adaptor");
03194     return [];
03195   }
03196 
03197   my %genomic_features = map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ]  ) } $self->adaptor()->db()->_each_DASFeatureFactory;
03198   return \%genomic_features;
03199 
03200 }
03201 
03202 sub old_get_all_DASFeatures{
03203    my ($self,@args) = @_;
03204 
03205   if(!$self->adaptor()) {
03206     warning("Cannot retrieve features without attached adaptor");
03207     return [];
03208   }
03209 
03210   my %genomic_features =
03211       map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ]  ) }
03212          $self->adaptor()->db()->_each_DASFeatureFactory;
03213   return \%genomic_features;
03214 
03215 }
03216 
03217 
03218 =head2 get_all_ExternalFeatures
03219 
03220   Arg [1]    : (optional) string $track_name
03221                If specified only features from ExternalFeatureAdaptors with
03222                the track name $track_name are retrieved.
03223                If not set, all features from every ExternalFeatureAdaptor are
03224                retrieved.
03225   Example    : @x_features = @{$slice->get_all_ExternalFeatures}
03226   Description: Retrieves features on this slice from external feature adaptors
03227   Returntype : listref of Bio::SeqFeatureI implementing objects in slice
03228                coordinates
03229   Exceptions : none
03230   Caller     : general
03231   Status     : Stable
03232 
03233 =cut
03234 
03235 sub get_all_ExternalFeatures {
03236    my ($self, $track_name) = @_;
03237 
03238    if(!$self->adaptor()) {
03239      warning("Cannot retrieve features without attached adaptor");
03240      return [];
03241    }
03242 
03243    my $features = [];
03244 
03245    my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
03246    my @xf_adaptors = ();
03247 
03248    if($track_name) {
03249      #use a specific adaptor
03250      if(exists $xfa_hash->{$track_name}) {
03251        push @xf_adaptors, $xfa_hash->{$track_name};
03252      }
03253    } else {
03254      #use all of the adaptors
03255      push @xf_adaptors, values %$xfa_hash;
03256    }
03257 
03258 
03259    foreach my $xfa (@xf_adaptors) {
03260      push @$features, @{$xfa->fetch_all_by_Slice($self)};
03261    }
03262 
03263    return $features;
03264 }
03265 
03266 
03267 =head2 get_all_DitagFeatures
03268 
03269   Arg [1]    : (optional) string ditag type
03270   Arg [1]    : (optional) string logic_name
03271   Example    : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
03272   Description: Retrieves the DitagFeatures of a specific type which overlap
03273                this slice with. If type is not defined, all features are
03274                retrieved.
03275   Returntype : listref of Bio::EnsEMBL::DitagFeatures
03276   Exceptions : warning if slice does not have attached adaptor
03277   Caller     : general
03278   Status     : Stable
03279 
03280 =cut
03281 
03282 sub get_all_DitagFeatures {
03283    my ($self, $type, $logic_name) = @_;
03284 
03285    if(!$self->adaptor()) {
03286      warning('Cannot get DitagFeatures without attached adaptor');
03287      return [];
03288    }
03289 
03290    my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor();
03291 
03292    return $dfa->fetch_all_by_Slice($self, $type, $logic_name);
03293 }
03294 
03295 
03296 
03297 
03298 # GENERIC FEATURES (See DBAdaptor.pm)
03299 
03300 =head2 get_generic_features
03301 
03302   Arg [1]    : (optional) List of names of generic feature types to return.
03303                If no feature names are given, all generic features are
03304                returned.
03305   Example    : my %features = %{$slice->get_generic_features()};
03306   Description: Gets generic features via the generic feature adaptors that
03307                have been added via DBAdaptor->add_GenricFeatureAdaptor (if
03308                any)
03309   Returntype : Hash of named features.
03310   Exceptions : none
03311   Caller     : none
03312   Status     : Stable
03313 
03314 =cut
03315 
03316 sub get_generic_features {
03317 
03318   my ($self, @names) = @_;
03319 
03320   if(!$self->adaptor()) {
03321     warning('Cannot retrieve features without attached adaptor');
03322     return [];
03323   }
03324 
03325   my $db = $self->adaptor()->db();
03326 
03327   my %features = ();   # this will hold the results
03328 
03329   # get the adaptors for each feature
03330   my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
03331 
03332   foreach my $adaptor_name (keys(%adaptors)) {
03333 
03334     my $adaptor_obj = $adaptors{$adaptor_name};
03335     # get the features and add them to the hash
03336     my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
03337     # add each feature to the hash to be returned
03338     foreach my $feature (@$features_ref) {
03339       $features{$adaptor_name} = $feature;
03340     }
03341   }
03342 
03343   return \%features;
03344 
03345 }
03346 
03347 =head2 project_to_slice
03348 
03349   Arg [1]    : Slice to project to.
03350   Example    : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
03351                 foreach my $segment ( @$chr_projection ){
03352                   $chr_slice = $segment->to_Slice();
03353                   print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
03354                         $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
03355                     '-'.$chr_slice->end().
03356                          $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
03357                 }
03358   Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
03359                and we want to state which one to project to.
03360   Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
03361                can also be used as [$start,$end,$slice] triplets.
03362   Exceptions : none
03363   Caller     : none
03364   Status     : At Risk
03365 
03366 =cut
03367 
03368 sub project_to_slice {
03369   my $self = shift;
03370   my $to_slice = shift;
03371 
03372   throw('Slice argument is required') if(!$to_slice);
03373 
03374   my $slice_adaptor = $self->adaptor();
03375 
03376   if(!$slice_adaptor) {
03377     warning("Cannot project without attached adaptor.");
03378     return [];
03379   }
03380 
03381 
03382   my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
03383 
03384   my $cs = $to_slice->coord_system();
03385   my $slice_cs = $self->coord_system();
03386   my $to_slice_id = $to_slice->get_seq_region_id;
03387 
03388   my @projection;
03389   my $current_start = 1;
03390 
03391   # decompose this slice into its symlinked components.
03392   # this allows us to handle haplotypes and PARs
03393   my $normal_slice_proj =
03394     $slice_adaptor->fetch_normalized_slice_projection($self);
03395   foreach my $segment (@$normal_slice_proj) {
03396     my $normal_slice = $segment->[2];
03397 
03398     $slice_cs = $normal_slice->coord_system();
03399 
03400     my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
03401     my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
03402 
03403     # perform the mapping between this slice and the requested system
03404     my @coords;
03405 
03406     if( defined $asm_mapper ) {
03407      @coords = $asm_mapper->map($normal_slice->seq_region_name(),
03408                  $normal_slice->start(),
03409                  $normal_slice->end(),
03410                  $normal_slice->strand(),
03411                  $slice_cs, undef, $to_slice);
03412     } else {
03413       $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
03414                            $normal_slice->end());
03415     }
03416 
03417     my $last_rank =0;
03418     #construct a projection from the mapping results and return it
03419     foreach my $coord (@coords) {
03420       my $coord_start  = $coord->start();
03421       my $coord_end    = $coord->end();
03422       my $length       = $coord_end - $coord_start + 1;
03423 
03424 
03425       if( $last_rank != $coord->rank){
03426     $current_start = 1;
03427       }
03428       $last_rank = $coord->rank;
03429 
03430       #skip gaps
03431       if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
03432     if($coord->id != $to_slice_id){ # for multiple mappings only get the correct one
03433       $current_start += $length;
03434       next;
03435     }
03436         my $coord_cs     = $coord->coord_system();
03437 
03438         # If the normalised projection just ended up mapping to the
03439         # same coordinate system we were already in then we should just
03440         # return the original region.  This can happen for example, if we
03441         # were on a PAR region on Y which refered to X and a projection to
03442         # 'toplevel' was requested.
03443 #        if($coord_cs->equals($slice_cs)) {
03444 #          # trim off regions which are not defined
03445 #          return $self->_constrain_to_region();
03446 #        }
03447 
03448         #create slices for the mapped-to coord system
03449         my $slice = $slice_adaptor->fetch_by_seq_region_id(
03450                                                     $coord->id(),
03451                                                     $coord_start,
03452                                                     $coord_end,
03453                                                     $coord->strand());
03454 
03455     my $current_end = $current_start + $length - 1;
03456 
03457         push @projection, bless([$current_start, $current_end, $slice],
03458                                 "Bio::EnsEMBL::ProjectionSegment");
03459       }
03460 
03461       $current_start += $length;
03462     }
03463   }
03464 
03465 
03466   # delete the cache as we may want to map to different set next time and old
03467   # results will be cached.
03468 
03469   $mapper_aptr->delete_cache;
03470 
03471   return \@projection;
03472 }
03473 
03474 
03475 =head2 get_all_synonyms
03476 
03477   Args       : none.
03478   Example    : my @alternative_names = @{$slice->get_all_synonyms()};
03479   Description: get a list of alternative names for this slice
03480   Returntype : reference to list of SeqRegionSynonym objects.
03481   Exception  : none
03482   Caller     : general
03483   Status     : At Risk
03484 
03485 =cut
03486 
03487 sub get_all_synonyms{
03488   my $self = shift;
03489   my $external_db_id =shift;
03490 
03491   if ( !defined( $self->{'synonym'} ) ) {
03492     my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
03493     $self->{'synonym'} =
03494       $adap->get_synonyms( $self->get_seq_region_id($self) );
03495   }
03496 
03497   return $self->{'synonym'};
03498 }
03499 
03500 =head2 add_synonym
03501 
03502   Args[0]    : synonym.
03503   Example    : $slice->add_synonym("alt_name");
03504   Description: add an alternative name for this slice
03505   Returntype : none
03506   Exception  : none
03507   Caller     : general
03508   Status     : At Risk
03509 
03510 =cut
03511 
03512 sub add_synonym{
03513   my $self = shift;
03514   my $syn = shift;
03515   my $external_db_id = shift;
03516   
03517   my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
03518   if ( !defined( $self->{'synonym'} ) ) {
03519     $self->{'synonym'} = $self->get_all_synonyms();
03520   }
03521   my $new_syn = Bio::EnsEMBL::SeqRegionSynonym->new( #-adaptor => $adap,
03522                                                      -synonym => $syn,
03523                                                      -external_db_id => $external_db_id, 
03524                                                      -seq_region_id => $self->get_seq_region_id($self));
03525 
03526   print "ADDED new syn $syn to ".$new_syn->seq_region_id."\n";
03527 
03528   push (@{$self->{'synonym'}}, $new_syn);
03529 
03530   return;
03531 }
03532 
03533 =head2 summary_as_hash
03534 
03535   Example       : $slice_summary = $slice->summary_as_hash();
03536   Description   : Retrieves a textual summary of this slice.
03537   Returns       : hashref of descriptive strings
03538 =cut
03539 
03540 sub summary_as_hash {
03541   my $self = shift;
03542   my %summary;
03543   $summary{'display_id'} = $self->display_id;
03544   $summary{'start'} = $self->start;
03545   $summary{'end'} = $self->end;
03546   $summary{'strand'} = $self->strand;
03547   $summary{'Is_circular'} = $self->is_circular ? "true" : "false";
03548   $summary{'region_name'} = $self->seq_region_name();
03549   return \%summary;
03550 }
03551 
03552 #
03553 # Bioperl Bio::PrimarySeqI methods:
03554 #
03555 
03556 =head2 id
03557 
03558   Description: Included for Bio::PrimarySeqI interface compliance (0.7)
03559 
03560 =cut
03561 
03562 sub id { name(@_); }
03563 
03564 
03565 =head2 display_id
03566 
03567   Description: Included for Bio::PrimarySeqI interface compliance (1.2)
03568 
03569 =cut
03570 
03571 sub display_id { name(@_); }
03572 
03573 
03574 =head2 primary_id
03575 
03576   Description: Included for Bio::PrimarySeqI interface compliance (1.2)
03577 
03578 =cut
03579 
03580 sub primary_id { name(@_); }
03581 
03582 
03583 =head2 desc
03584 
03585 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
03586 
03587 =cut
03588 
03589 sub desc{ return $_[0]->coord_system->name().' '.$_[0]->seq_region_name(); }
03590 
03591 
03592 =head2 moltype
03593 
03594 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
03595 
03596 =cut
03597 
03598 sub moltype { return 'dna'; }
03599 
03600 =head2 alphabet
03601 
03602   Description: Included for Bio::PrimarySeqI interface compliance (1.2)
03603 
03604 =cut
03605 
03606 sub alphabet { return 'dna'; }
03607 
03608 
03609 =head2 accession_number
03610 
03611   Description: Included for Bio::PrimarySeqI interface compliance (1.2)
03612 
03613 =cut
03614 
03615 sub accession_number { name(@_); }
03616 
03617 
03618 # sub DEPRECATED METHODS #
03619 ###############################################################################
03620 
03621 =head1 DEPRECATED METHODS
03622 
03623 =head2 get_all_AffyFeatures
03624 
03625   Description:  DEPRECATED, use functionality provided by the Ensembl
03626                 Functional Genomics API instead.
03627 
03628 =cut
03629 
03630 sub get_all_AffyFeatures {
03631     deprecate( 'Use functionality provided by the '
03632         . 'Ensembl Functional Genomics API instead.' );
03633     throw('Can not delegate deprecated functionality.');
03634 
03635     # Old code:
03636 
03637 #    my $self = shift;
03638 #    my @arraynames = @_;
03639 #
03640 #    my $sa = $self->adaptor();
03641 #    if ( ! $sa ) {
03642 #        warning( "Cannot retrieve features without attached adaptor." );
03643 #    }
03644 #    my $fa = $sa->db()->get_AffyFeatureAdaptor();
03645 #    my $features;
03646 #
03647 #    if ( @arraynames ) {
03648 #        $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
03649 #    } else {
03650 #        $features = $fa->fetch_all_by_Slice( $self );
03651 #    }
03652 #    return $features;
03653 }
03654 
03655 =head2 get_all_OligoFeatures
03656 
03657   Description:  DEPRECATED, use functionality provided by the Ensembl
03658                 Functional Genomics API instead.
03659 
03660 =cut
03661 
03662 sub get_all_OligoFeatures {
03663 
03664     deprecate( 'Use functionality provided by the '
03665         . 'Ensembl Functional Genomics API instead.' );
03666     throw('Can not delegate deprecated functionality.');
03667 
03668     # Old code:
03669 
03670 #    my $self = shift;
03671 #    my @arraynames = @_;
03672 #
03673 #    my $sa = $self->adaptor();
03674 #    if ( ! $sa ) {
03675 #        warning( "Cannot retrieve features without attached adaptor." );
03676 #    }
03677 #    my $fa = $sa->db()->get_OligoFeatureAdaptor();
03678 #    my $features;
03679 #
03680 #    if ( @arraynames ) {
03681 #        $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
03682 #    } else {
03683 #        $features = $fa->fetch_all_by_Slice( $self );
03684 #    }
03685 #    return $features;
03686 }
03687 
03688 =head2 get_all_OligoFeatures_by_type
03689 
03690   Description:  DEPRECATED, use functionality provided by the Ensembl
03691                 Functional Genomics API instead.
03692 
03693 =cut
03694 
03695 sub get_all_OligoFeatures_by_type {
03696 
03697     deprecate( 'Use functionality provided by the '
03698         . 'Ensembl Functional Genomics API instead.' );
03699     throw('Can not delegate deprecated functionality.');
03700 
03701     # Old code:
03702 
03703 #    my ($self, $type, $logic_name) = @_;
03704 #
03705 #    throw('Need type as parameter') if !$type;
03706 #
03707 #    my $sa = $self->adaptor();
03708 #    if ( ! $sa ) {
03709 #        warning( "Cannot retrieve features without attached adaptor." );
03710 #    }
03711 #    my $fa = $sa->db()->get_OligoFeatureAdaptor();
03712 #
03713 #    my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name );
03714 #
03715 #    return $features;
03716 }
03717 
03718 =head2 get_all_supercontig_Slices
03719 
03720   Description: DEPRECATED use get_tiling_path("NTcontig") instead
03721 
03722 =cut
03723 
03724 
03725 sub get_all_supercontig_Slices {
03726   my $self = shift;
03727 
03728   deprecate("Use get_tiling_path('NTcontig') instead");
03729 
03730   my $result = [];
03731 
03732   if( $self->adaptor() ) {
03733     my $superctg_names =
03734       $self->adaptor()->list_overlapping_supercontigs( $self );
03735 
03736     for my $name ( @$superctg_names ) {
03737       my $slice;
03738       $slice = $self->adaptor()->fetch_by_supercontig_name( $name );
03739       $slice->name( $name );
03740       push( @$result, $slice );
03741     }
03742   } else {
03743     warning( "Slice needs to be attached to a database to get supercontigs" );
03744   }
03745 
03746   return $result;
03747 }
03748 
03749 
03750 
03751 
03752 
03753 =head2 get_Chromosome
03754 
03755   Description: DEPRECATED use this instead:
03756                $slice_adp->fetch_by_region('chromosome',
03757                                            $slice->seq_region_name)
03758 
03759 =cut
03760 
03761 sub get_Chromosome {
03762   my $self = shift @_;
03763 
03764   deprecate("Use SliceAdaptor::fetch_by_region('chromosome'," .
03765             '$slice->seq_region_name) instead');
03766 
03767   my $csa = $self->adaptor->db->get_CoordSystemAdaptor();
03768   my ($top_cs) = @{$csa->fetch_all()};
03769 
03770   return $self->adaptor->fetch_by_region($top_cs->name(),
03771                                          $self->seq_region_name(),
03772                                          undef,undef,undef,
03773                                          $top_cs->version());
03774 }
03775 
03776 
03777 
03778 =head2 chr_name
03779 
03780   Description: DEPRECATED use seq_region_name() instead
03781 
03782 =cut
03783 
03784 sub chr_name{
03785   deprecate("Use seq_region_name() instead");
03786   seq_region_name(@_);
03787 }
03788 
03789 
03790 
03791 =head2 chr_start
03792 
03793   Description: DEPRECATED use start() instead
03794 
03795 =cut
03796 
03797 sub chr_start{
03798   deprecate('Use start() instead');
03799   start(@_);
03800 }
03801 
03802 
03803 
03804 =head2 chr_end
03805 
03806   Description: DEPRECATED use end() instead
03807   Returntype : int
03808   Exceptions : none
03809   Caller     : SliceAdaptor, general
03810 
03811 =cut
03812 
03813 sub chr_end{
03814   deprecate('Use end() instead');
03815   end(@_);
03816 }
03817 
03818 
03819 =head2 assembly_type
03820 
03821   Description: DEPRECATED use version instead
03822 
03823 =cut
03824 
03825 sub assembly_type{
03826   my $self = shift;
03827   deprecate('Use $slice->coord_system()->version() instead.');
03828   return $self->coord_system->version();
03829 }
03830 
03831 
03832 =head2 get_tiling_path
03833 
03834   Description: DEPRECATED use project instead
03835 
03836 =cut
03837 
03838 sub get_tiling_path {
03839   my $self = shift;
03840   deprecate('Use $slice->project("seqlevel") instead.');
03841   return [];
03842 }
03843 
03844 
03845 =head2 dbID
03846 
03847   Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead
03848 
03849 =cut
03850 
03851 sub dbID {
03852   my $self = shift;
03853   deprecate('Use SliceAdaptor::get_seq_region_id instead.');
03854   if(!$self->adaptor) {
03855     warning('Cannot retrieve seq_region_id without attached adaptor.');
03856     return 0;
03857   }
03858   return $self->adaptor->get_seq_region_id($self);
03859 }
03860 
03861 
03862 =head2 get_all_MapFrags
03863 
03864   Description: DEPRECATED use get_all_MiscFeatures instead
03865 
03866 =cut
03867 
03868 sub get_all_MapFrags {
03869   my $self = shift;
03870   deprecate('Use get_all_MiscFeatures instead');
03871   return $self->get_all_MiscFeatures(@_);
03872 }
03873 
03874 =head2 has_MapSet
03875 
03876   Description: DEPRECATED use get_all_MiscFeatures instead
03877 
03878 =cut
03879 
03880 sub has_MapSet {
03881   my( $self, $mapset_name ) = @_;
03882   deprecate('Use get_all_MiscFeatures instead');
03883   my $mfs = $self->get_all_MiscFeatures($mapset_name);
03884   return (@$mfs > 0);
03885 }
03886 
03887 1;