Archive Ensembl HomeArchive Ensembl Home
StrainSlice.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice
00024 of the genome for a certain strain (applying the variations)
00025 
00026 =head1 SYNOPSIS
00027 
00028   $sa = $db->get_SliceAdaptor;
00029 
00030   $slice =
00031     $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
00032 
00033   $strainSlice = $slice->get_by_strain($strain_name);
00034 
00035   # get the sequence from the Strain Slice
00036   my $seq = $strainSlice->seq();
00037   print $seq;
00038 
00039   # get allele features between this StrainSlice and the reference
00040   my $afs = $strainSlice->get_all_AlleleFeatures_Slice();
00041   foreach my $af ( @{$afs} ) {
00042     print "AlleleFeature in position ", $af->start, "-", $af->end,
00043       " in strain with allele ", $af->allele_string, "\n";
00044   }
00045 
00046   # compare a strain against another strain
00047   my $strainSlice_2 = $slice->get_by_strain($strain_name_2);
00048   my $differences =
00049     $strainSlice->get_all_differences_StrainSlice($strainSlice_2);
00050 
00051   foreach my $difference ( @{$differences} ) {
00052     print "Difference in position ", $difference->start, "-",
00053       $difference->end(),           " in strain with allele ",
00054       $difference->allele_string(), "\n";
00055   }
00056 
00057 =head1 DESCRIPTION
00058 
00059 A StrainSlice object represents a region of a genome for a certain
00060 strain.  It can be used to retrieve sequence or features from a strain.
00061 
00062 =head1 METHODS
00063 
00064 =cut
00065 
00066 package Bio::EnsEMBL::StrainSlice;
00067 use vars qw(@ISA);
00068 use strict;
00069 
00070 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00071 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
00072 use Bio::EnsEMBL::Slice;
00073 use Bio::EnsEMBL::Mapper;
00074 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
00075 
00076 @ISA = qw(Bio::EnsEMBL::Slice);
00077 
00078 
00079 =head2 new
00080 
00081     Arg[1]      : Bio::EnsEMBL::Slice $slice
00082     Arg[2]      : string $strain_name
00083     Example     : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => ,
00084                                   -strain_name => $strain_name);
00085     Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the
00086                   Slice object, plus additional information such as the Strain this Slice refers to
00087                   and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
00088                   reference sequence
00089     ReturnType  : Bio::EnsEMBL::StrainSlice
00090     Exceptions  : none
00091     Caller      : general
00092 
00093 =cut
00094 
00095 sub new{
00096     my $caller = shift;
00097     my $class = ref($caller) || $caller;
00098 
00099     my ($strain_name) = rearrange(['STRAIN_NAME'],@_);
00100 
00101     my $self = $class->SUPER::new(@_);
00102 
00103     $self->{'strain_name'} = $strain_name;
00104 
00105     if(!$self->adaptor()) {
00106     warning('Cannot get new StrainSlice features without attached adaptor');
00107     return '';
00108     }
00109     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
00110 
00111     unless($variation_db) {
00112     warning("Variation database must be attached to core database to " .
00113         "retrieve variation information" );
00114     return '';
00115     }
00116     
00117     my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
00118     
00119     if( $af_adaptor ) {
00120     #get the Individual for the given strain
00121     my $ind_adaptor = $variation_db->get_IndividualAdaptor;
00122 
00123     if ($ind_adaptor){
00124         my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain
00125         #check that the individua returned isin the database
00126 
00127             if (defined $individual){
00128                 my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual);
00129                 #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0];
00130                 my $vf_ids = {}; #hash containing the relation vf_id->af
00131         $self->{'_strain'} = $individual;       
00132         map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : ''
00133 } @{$allele_features};
00134 #       my $new_allele_features = $self->_filter_af_by_coverage($allele_features);
00135 #       $self->{'alleleFeatures'} = $new_allele_features;
00136         $self->{'alleleFeatures'} = $allele_features || [];
00137         $self->{'_vf_ids'} = $vf_ids;
00138         return $self;
00139         }
00140         else{ 
00141         warning("Strain ($self->{strain_name}) not in the database");
00142         return $self;
00143         }
00144     }
00145     else{
00146         warning("Not possible to retrieve IndividualAdaptor from the variation database");
00147         return '';
00148     }
00149     } else {
00150     warning("Not possible to retrieve VariationFeatureAdaptor from variation database");
00151     return '';
00152     }
00153 }
00154 
00155 =head2 _filter_af_by_coverage
00156 
00157     Arg [1]     : listref to Bio::EnsEMBL::Variation::AlleleFeatures  $allele_features
00158     Example     : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features);
00159     Description : For a list of allele features, gets a new list where they are filter depending on coverage
00160     ReturnType  : listref of Bio::EnsEMBL::Variation::AlleleFeature
00161     Exceptions  : none
00162     Caller      : internal function
00163 
00164 =cut
00165 
00166 sub _filter_af_by_coverage{
00167     my $self = shift;
00168     my $allele_features = shift;
00169 
00170     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
00171 
00172     unless($variation_db) {
00173     warning("Variation database must be attached to core database to " .
00174         "retrieve variation information" );
00175     return '';
00176     }
00177     
00178     my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
00179     #this is ugly, but ReadCoverage is always defined in the positive strand
00180 
00181 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
00182 ###  passing 1 will only get you the coverage of level 1
00183 ###  by omitting the parameter we take into account all coverage regions 
00184 #    my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
00185     my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
00186     my $new_af;
00187     foreach my $af (@{$allele_features}){
00188     foreach my $rc (@{$rcs}){
00189         if ($af->start <= $rc->end and $af->start >= $rc->start){
00190         push @{$new_af}, $af;
00191         last;
00192         }
00193     }
00194     }
00195     
00196     return $new_af;
00197 }
00198 
00199 
00200 =head2 strain_name
00201 
00202     Arg [1]     : (optional) string $strain_name
00203     Example     : my $strain_name = $strainSlice->strain_name();
00204     Description : Getter/Setter for the name of the strain
00205     ReturnType  : string
00206     Exceptions  : none
00207     Caller      : general
00208 
00209 =cut
00210 
00211 sub strain_name{
00212    my $self = shift;
00213    if (@_){
00214        $self->{'strain_name'} = shift @_;
00215    }
00216    return $self->{'strain_name'};
00217 }
00218 
00219 
00220 =head2 display_Slice_name
00221 
00222     Args        : none
00223     Example     : my $strain_name = $strainSlice->display_Slice_name();
00224     Description : Getter for the name of the strain
00225     ReturnType  : string
00226     Exceptions  : none
00227     Caller      : webteam
00228 
00229 =cut
00230 
00231 sub display_Slice_name{
00232     my $self = shift;
00233 
00234     return $self->strain_name;
00235 }
00236 
00237 =head2 seq
00238 
00239   Arg [1]    : int $with_coverage (optional)
00240   Example    : print "SEQUENCE = ", $strainSlice->seq();
00241   Description: Returns the sequence of the region represented by this
00242                slice formatted as a string in the strain. If flag with_coverage
00243                is set to 1, returns sequence if there is coverage in the region
00244   Returntype : string
00245   Exceptions : none
00246   Caller     : general
00247 
00248 =cut
00249 
00250 sub seq {
00251   my $self = shift;
00252   my $with_coverage = shift;
00253 
00254   $with_coverage ||= 0;
00255 
00256   # special case for in-between (insert) coordinates
00257   return '' if($self->start() == $self->end() + 1);
00258 
00259   return $self->{'seq'} if($self->{'seq'});
00260 
00261   if($self->adaptor()) {
00262     my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
00263     my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
00264     
00265     #apply all differences to the reference sequence
00266     #first, in case there are any indels, create the new sequence (containing the '-' bases)
00267    # sort edits in reverse order to remove complication of
00268     # adjusting downstream edits
00269     my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
00270 
00271     foreach my $vf (@indels_ordered){
00272     $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
00273     }
00274     
00275     #need to find coverage information if diffe
00276     # sort edits in reverse order to remove complication of
00277     # adjusting downstream edits
00278     my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
00279 
00280     foreach my $vf (@variation_features_ordered){
00281     $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
00282     }
00283     
00284     #need to find coverage information if different from reference
00285     my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor;
00286     my $ref_strain = $indAdaptor->get_reference_strain_name;
00287     $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain);
00288     return substr(${$reference_sequence},0,1) if ($self->length == 1); 
00289     return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels
00290   }
00291 
00292   # no attached sequence, and no db, so just return Ns
00293   return 'N' x $self->length();
00294 }
00295 
00296 sub expanded_length() {
00297     my $self = shift;
00298     
00299     my $length = $self->SUPER::length();
00300     
00301     foreach my $af(@{$self->{'alleleFeatures'}}) {
00302         $length += $af->length_diff() if $af->length_diff > 0;
00303     }
00304     
00305     return $length;
00306 }
00307 
00308 
00309 
00310 sub _add_coverage_information{
00311     my $self = shift;
00312     my $reference_sequence = shift;
00313 
00314     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
00315 
00316     unless($variation_db) {
00317     warning("Variation database must be attached to core database to " .
00318         "retrieve variation information" );
00319     return '';
00320     }
00321     
00322     my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
00323 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
00324 ###  passing 1 will only get you the coverage of level 1
00325 ###  by omitting the parameter we take into account all coverage regions 
00326 #    my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
00327     my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
00328     my $rcs_sorted;
00329     @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1);
00330     $rcs = $rcs_sorted if ($self->strand == -1);
00331     my $start = 1;
00332     
00333     
00334     # wm2 - new code to mask sequence, instead starts with masked string
00335     # and unmasks seq where there is read coverage
00336     
00337     # get all length-changing vars
00338     my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
00339     
00340     my $masked_seq = '~' x length($$reference_sequence);
00341     
00342     foreach my $rc(@{$rcs}) {
00343         my ($start, $end) = ($rc->start, $rc->end);
00344         
00345         # adjust region for indels
00346         foreach my $indel(@indels_ordered) {
00347             next if $rc->start > $end;
00348             
00349             # if within RC region, only need adjust the end
00350             $start += $indel->length_diff unless $indel->start > $start;
00351             $end += $indel->length_diff;
00352         }
00353         
00354         # adjust coords for seq boundaries
00355         $start = 1 if $start < 1;
00356         $end = CORE::length($masked_seq) if $end > CORE::length($masked_seq);
00357         
00358         # now unmask the sequence using $$reference_sequence
00359         substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1);
00360     }
00361     
00362     # wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY
00363 #    foreach my $rc (@{$rcs}){
00364 #       $rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice
00365 #       $rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end);
00366 #       
00367 #       warn "Adjusted: ", $rc->start, "-", $rc->end;
00368 #       
00369 #       warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases";
00370 #       
00371 #        substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start);
00372 #        $start = $rc->end;
00373 #
00374 #    }
00375 #    substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start);
00376     
00377     # copy the masked sequence to the reference sequence
00378     $$reference_sequence = $masked_seq;
00379 }
00380 
00381 
00382 =head2 get_AlleleFeature
00383 
00384     Arg[1]      : Bio::EnsEMBL::Variation::VariationFeature $vf
00385     Example     : my $af = $strainSlice->get_AlleleFeature($vf);
00386     Description : Returns the AlleleFeature object associated with the VariationFeature (if any)
00387     ReturnType  : Bio::EnsEMBL::Variation::AlleleFeature
00388     Exceptions  : none
00389     Caller      : general
00390 
00391 =cut
00392 
00393 sub get_AlleleFeature{
00394     my $self = shift;
00395     my $vf = shift;
00396     
00397     my $af;
00398     #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef
00399     $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID});
00400     return $af;
00401 }
00402 
00403 
00404 =head2 get_all_AlleleFeatures_Slice
00405 
00406     Arg[1]      : int $with_coverage (optional)
00407     Example     : my $af = $strainSlice->get_all_AlleleFeatures_Slice()
00408     Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined.
00409                   If argument $with_coverage set to 1, returns only AF if they have coverage information
00410     ReturnType  : listref of Bio::EnsEMBL::Variation::AlleleFeature
00411     Exceptions  : none
00412     Caller      : general
00413 
00414 =cut
00415 
00416 sub get_all_AlleleFeatures_Slice{
00417     my $self = shift;
00418     my $with_coverage = shift;
00419 
00420     my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
00421 
00422     unless($variation_db) {
00423     warning("Variation database must be attached to core database to " .
00424         "retrieve variation information" );
00425     return '';
00426     }
00427     my $indAdaptor = $variation_db->get_IndividualAdaptor();
00428     my $ref_name =  $indAdaptor->get_reference_strain_name;
00429     return [] if ($self->strain_name eq $ref_name);
00430     $with_coverage ||= 0; #by default, get all AlleleFeatures
00431     if ($with_coverage == 1){
00432     my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'});
00433     return $new_allele_features || [];
00434     }
00435 
00436     return $self->{'alleleFeatures'} || [];
00437 }
00438 
00439 =head2 get_all_differences_StrainSlice
00440 
00441     Arg[1]      : Bio::EnsEMBL::StrainSlice $ss
00442     Example     : my $differences = $strainSlice->get_all_differences_StrainSlice($ss)
00443     Description : Gets differences between 2 StrainSlice objects
00444     ReturnType  : listref of Bio::EnsEMBL::Variation::AlleleFeature
00445     Exceptions  : thrown on bad argument
00446     Caller      : general
00447 
00448 =cut
00449 
00450 sub get_all_differences_StrainSlice{
00451     my $self = shift;
00452     my $strainSlice = shift;
00453 
00454     if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){
00455     throw('Bio::EnsEMBL::StrainSlice arg expected');
00456     }
00457     if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){
00458     return undef; #there are no differences in any of the Strains
00459     
00460     }
00461     my $differences; #differences between strains
00462     if (@{$strainSlice->{'alleleFeatures'}} == 0){
00463     #need to create a copy of VariationFeature
00464     foreach my $difference (@{$self->{'alleleFeatures'}}){
00465         my %vf = %$difference;
00466         push @{$differences},bless \%vf,ref($difference);
00467     }
00468     }
00469     elsif (@{$self->{'alleleFeatures'}} == 0){
00470     #need to create a copy of VariationFeature, but changing the allele by the allele in the reference
00471     foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
00472         push @{$differences}, $strainSlice->_convert_difference($difference);
00473     }
00474     }
00475     else{
00476     #both strains have differences
00477     #create a hash with the differences in the self strain slice
00478     my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}};
00479     foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
00480         #there is no difference in the other strain slice, convert the allele
00481         if (!defined $variation_features_self{$difference->start}){
00482         push @{$differences},$strainSlice->_convert_difference($difference);
00483         }       
00484         else{
00485         #if it is defined and have the same allele, delete from the hash
00486         if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){
00487             delete $variation_features_self{$difference->start};
00488         }
00489         }
00490     }   
00491     #and copy the differences that in the self
00492     foreach my $difference (values %variation_features_self){
00493         my %vf = %$difference;
00494         push @{$differences},bless \%vf,ref($difference);
00495     }
00496 
00497     }
00498     #need to map differences to the self 
00499     my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice
00500 #    print Dumper($mapper);
00501     my @results;
00502     foreach my $difference (@{$differences}){   
00503     @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
00504     #we can have 3 possibilities:
00505     #the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice
00506     if (@results == 2){
00507         #the first position in the result is the beginning of the insertion
00508         if($results[0]->start < $results[1]->start){
00509         $difference->start($results[0]->end+1);
00510         $difference->end($results[1]->start-1);
00511         }
00512         else{
00513         $difference->start($results[1]->end+1);
00514         $difference->end($results[0]->start-1);
00515         }
00516         $difference->strand($results[0]->strand);
00517     }
00518     else{
00519         #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
00520         # or a Bio::EnsEMBL::Mapper::IndelCoordinate
00521 #       print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n";
00522         $difference->start($results[0]->start);
00523         $difference->end($results[0]->end);
00524         $difference->strand($results[0]->strand);
00525     }
00526     }
00527 
00528     return $differences;
00529 }
00530 
00531 #for a given VariationFeatures, converts the allele into the reference allele and returns a new list with
00532 #the converted VariationFeatures
00533 sub _convert_difference{
00534     my $self = shift;
00535     my $difference = shift;
00536     my %new_vf = %$difference; #make a copy of the variationFeature
00537     #and change the allele with the one from the reference Slice
00538     $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
00539     return bless \%new_vf,ref($difference);
00540 }
00541 
00542 =head2 sub_Slice
00543 
00544   Arg   1    : int $start
00545   Arg   2    : int $end
00546   Arge [3]   : int $strand
00547   Example    : none
00548   Description: Makes another StrainSlice that covers only part of this slice
00549                with the appropriate differences to the reference Slice
00550                If a slice is requested which lies outside of the boundaries
00551                of this function will return undef.  This means that
00552                behaviour will be consistant whether or not the slice is
00553                attached to the database (i.e. if there is attached sequence
00554                to the slice).  Alternatively the expand() method or the
00555                SliceAdaptor::fetch_by_region method can be used instead.
00556   Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong
00557   Exceptions : thrown when trying to get the subSlice in the middle of a
00558                insertion
00559   Caller     : general
00560 
00561 =cut
00562 
00563 sub sub_Slice {
00564   my ( $self, $start, $end, $strand ) = @_;
00565   my $mapper = $self->mapper();
00566   #finally map from the Slice to the Strain
00567   my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice');
00568   my $new_start;
00569   my $new_end;
00570   my $new_strand;
00571   my $new_seq;
00572  
00573   #Get need start and end for the subSlice of the StrainSlice
00574   my @results_ordered = sort {$a->start <=> $b->start} @results;
00575   $new_start = $results_ordered[0]->start();
00576   $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
00577   $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
00578   $new_end = $results_ordered[-1]->end();  #get last element of the array, the end of the slice
00579 
00580   my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
00581   $subSlice->{'strain_name'} = $self->{'strain_name'};
00582 
00583   my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice
00584   #update the VariationFeatures in the sub_Slice of the Strain
00585   my $vf_start;
00586   my $vf_end;
00587   my $offset = $subSlice->start - $self->start;
00588 
00589   foreach my $variationFeature (@{$self->{'alleleFeatures'}}){
00590       #calculate the new position of the variation_feature in the subSlice
00591       $vf_start = $variationFeature->start - $offset;
00592       $vf_end = $variationFeature->end - $offset;
00593       if ($vf_start  >= 1 and $vf_end <= $subSlice->length){
00594       #copy the variationFeature
00595       my %new_vf;
00596       %new_vf = %$variationFeature;
00597       #and shift to the new coordinates
00598       $new_vf{'start'} = $vf_start;
00599       $new_vf{'end'} = $vf_end; 
00600       my $test = bless \%new_vf, ref($variationFeature);
00601       push @{$new_variations}, $test;
00602       }
00603   }
00604   $subSlice->{'alleleFeatures'} = $new_variations;
00605   return $subSlice;
00606 
00607 }
00608 
00609 =head2 ref_subseq
00610 
00611   Arg  [1]   : int $startBasePair
00612                relative to start of slice, which is 1.
00613   Arg  [2]   : int $endBasePair
00614                relative to start of slice.
00615   Arg  [3]   : (optional) int $strand
00616                The strand of the slice to obtain sequence from. Default
00617                value is 1.
00618   Description: returns string of dna from reference sequence
00619   Returntype : txt
00620   Exceptions : end should be at least as big as start
00621                strand must be set
00622   Caller     : general
00623 
00624 =cut
00625 
00626 sub ref_subseq{
00627   my $self = shift;
00628   my $start = shift;
00629   my $end = shift;
00630   my $strand = shift;
00631   # special case for in-between (insert) coordinates
00632   return '' if($start == $end + 1);
00633 
00634   my $subseq;
00635   if($self->adaptor){
00636     my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
00637     $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
00638       ( $self, $start,
00639         $end, $strand )};
00640   } else {
00641     ## check for gap at the beginning and pad it with Ns
00642     if ($start < 1) {
00643       $subseq = "N" x (1 - $start);
00644       $start = 1;
00645     }
00646     $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
00647     ## check for gap at the end and pad it with Ns
00648     if ($end > $self->length()) {
00649       $subseq .= "N" x ($end - $self->length());
00650     }
00651     reverse_comp(\$subseq) if($strand == -1);
00652   }
00653   return $subseq;
00654 }
00655 
00656 =head2 subseq
00657 
00658   Arg  [1]   : int $startBasePair
00659                relative to start of slice, which is 1.
00660   Arg  [2]   : int $endBasePair
00661                relative to start of slice.
00662   Arg  [3]   : (optional) int $strand
00663                The strand of the slice to obtain sequence from. Default
00664                value is 1.
00665   Description: returns string of dna sequence
00666   Returntype : txt
00667   Exceptions : end should be at least as big as start
00668                strand must be set
00669   Caller     : general
00670 
00671 =cut
00672 
00673 sub subseq {
00674   my ( $self, $start, $end, $strand ) = @_;
00675 
00676   if ( $end+1 < $start ) {
00677     throw("End coord + 1 is less then start coord");
00678   }
00679 
00680   # handle 'between' case for insertions
00681   return '' if( $start == $end + 1);
00682 
00683   $strand = 1 unless(defined $strand);
00684 
00685   if ( $strand != -1 && $strand != 1 ) {
00686     throw("Invalid strand [$strand] in call to Slice::subseq.");
00687   }
00688 
00689   my $subseq;
00690   my $seq;
00691   if($self->adaptor){
00692 
00693 
00694       $seq = $self->seq;
00695       reverse_comp(\$seq) if ($strand == -1);
00696       $subseq = substr($seq,$start-1,$end - $start + 1);
00697   } 
00698   else {
00699       ## check for gap at the beginning and pad it with Ns
00700       if ($start < 1) {
00701       $subseq = "N" x (1 - $start);
00702       $start = 1;
00703       }
00704       $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
00705       ## check for gap at the end and pad it with Ns
00706     if ($end > $self->length()) {
00707     $subseq .= "N" x ($end - $self->length());
00708     }
00709       reverse_comp(\$subseq) if($strand == -1);
00710   }
00711   return $subseq;
00712   
00713 }
00714 
00715 
00716 sub mapper{
00717     my $self = shift;
00718    
00719     if (@_) {
00720     delete $self->{'mapper'};
00721     }
00722     if(!defined $self->{'mapper'}){
00723     #create the mapper between the Slice and StrainSlice
00724     my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice');
00725     #align with Slice
00726     #get all the VariationFeatures in the strain Slice, from start to end in the Slice
00727     my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
00728     
00729     my $start_slice = 1;
00730     my $end_slice;
00731     my $start_strain = 1;
00732     my $end_strain;
00733     my $length_allele;
00734     foreach my $variation_feature (@variation_features_ordered){
00735         #we have a insertion/deletion: marks the beginning of new slice move coordinates        
00736         if ($variation_feature->length_diff != 0){
00737         $length_allele = $variation_feature->length + $variation_feature->length_diff();        
00738         $end_slice = $variation_feature->start() - 1;
00739 
00740         if ($end_slice >= $start_slice){
00741             $end_strain = $end_slice - $start_slice + $start_strain;
00742             #add the sequence that maps
00743             $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain);
00744             #add the indel
00745             $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
00746             $start_strain = $end_strain + $length_allele + 1;
00747         }
00748         else{
00749             #add the indel
00750             $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
00751             $start_strain += $length_allele;
00752         }
00753         $start_slice = $end_slice + $variation_feature->length+ 1;
00754         }
00755     }
00756     if ($start_slice <= $self->length){
00757         $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice);
00758     }
00759     $self->{'mapper'} = $mapper;
00760     }
00761     return $self->{'mapper'};
00762 }
00763 
00764 =head2 get_all_differences_Slice
00765 
00766     Description : DEPRECATED use get_all_AlleleFeatures instead
00767    
00768 =cut
00769 
00770 sub get_all_differences_Slice{
00771     my $self = shift;
00772     
00773     deprecate('Use get_all_differences_Slice instead');
00774     return $self->get_all_AlleleFeatures_Slice(@_);
00775 }
00776 
00777 =head2 get_all_VariationFeatures
00778 
00779     Arg[1]     : int $with_coverage (optional)
00780     Description :returns all alleleFeatures features on this slice. 
00781     ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
00782     Exceptions : none
00783     Caller     : contigview, snpview
00784 
00785 =cut
00786 
00787 sub get_all_VariationFeatures {
00788   my $self = shift;
00789   my $with_coverage = shift;
00790   $with_coverage ||= 0;
00791   return $self->get_all_AlleleFeatures_Slice($with_coverage);
00792 }
00793 
00794 =head2 get_original_seq_region_position
00795 
00796   Arg  [1]   : int $position
00797                relative to start of slice, which is 1.
00798   Description: Placeholder method - this method has no explicit use beyond
00799                providiing compatibility with AlignSlice. To map positions
00800                between the StrainSlice and the reference slice, use the
00801                mapper and its methods.
00802   Returntype : ($strainSlice, $seq_region_position), an array where the first
00803                element is a Bio::EnsEMBL::StrainSlice and the second one is the
00804                requested seq_region_position.
00805   Exceptions : none
00806   Caller     : general
00807 
00808 =cut
00809 
00810 sub get_original_seq_region_position {
00811     my $self = shift;
00812     my $position = shift;
00813     #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice
00814     #and coordinate
00815     return ($self,$position);
00816 }
00817 
00818 
00819 =head2 remove_indels
00820 
00821     Args        : none
00822     Example     : $strainSlice->remove_indels();
00823     Description : Removes insertions and deletions from the allele features
00824                   of this object
00825     ReturnType  : none
00826     Exceptions  : none
00827     Caller      : webteam
00828 
00829 =cut
00830 
00831 sub remove_indels {
00832     my $self = shift;
00833     
00834     my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}};
00835     
00836     $self->{'alleleFeatures'} = \@new_afs;
00837 }
00838 
00839 1;