Archive Ensembl HomeArchive Ensembl Home
AlignStrainSlice.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels)
00024 
00025 =head1 SYNOPSIS
00026 
00027   $sa = $db->get_SliceAdaptor;
00028 
00029   $slice =
00030     $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
00031 
00032   $strainSlice1 = $slice->get_by_Strain($strain_name1);
00033   $strainSlice2 = $slice->get_by_Strain($strain_name2);
00034 
00035   my @strainSlices;
00036   push @strainSlices, $strainSlice1;
00037   push @strainSlices, $strainSlice2;
00038 
00039   $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new(
00040     -SLICE   => $slice,
00041     -STRAINS => \@strainSlices
00042   );
00043 
00044   # Get coordinates of variation in alignSlice
00045   my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice();
00046 
00047   foreach my $af ( @{$alleleFeatures} ) {
00048     my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 );
00049     print( "Coordinates of the feature in AlignSlice are: ",
00050       $new_feature->start, "-", $new_feature->end, "\n" );
00051   }
00052 
00053 =head1 DESCRIPTION
00054 
00055 A AlignStrainSlice object represents a region of a genome align for
00056 certain strains.  It can be used to align certain strains to a reference
00057 slice.
00058 
00059 =head1 METHODS
00060 
00061 =cut
00062 
00063 package Bio::EnsEMBL::AlignStrainSlice;
00064 use strict;
00065 
00066 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00067 use Bio::EnsEMBL::Mapper;
00068 use Bio::EnsEMBL::Mapper::RangeRegistry;
00069 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
00070 
00071 =head2 new
00072 
00073     Arg[1]      : Bio::EnsEMBL::Slice $Slice
00074     Arg[2]      : listref of Bio::EnsEMBL::StrainSlice $strainSlice
00075     Example     : push @strainSlices, $strainSlice1;
00076                   push @strainSlices, $strainSlice2;
00077                   .....
00078                   push @strainSlices, $strainSliceN;
00079                   $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice,
00080                                       -STRAIN => \@strainSlices);
00081     Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between
00082                   the Slice object, plus all the indels from the different Strains
00083     ReturnType  : Bio::EnsEMBL::AlignStrainSlice
00084     Exceptions  : none
00085     Caller      : general
00086 
00087 =cut
00088 
00089 sub new{
00090     my $caller = shift;
00091     my $class = ref($caller) || $caller;
00092 
00093     my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_);
00094 
00095     #check that both StrainSlice and Slice are identical (must have been defined in the same slice)
00096     foreach my $strainSlice (@{$strainSlices}){
00097     if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){
00098         warning("Not possible to create Align object from different Slices");
00099         return [];
00100     }
00101     }
00102 
00103     return bless{'slice' => $slice,
00104          'strains' => $strainSlices}, $class;
00105 }
00106 
00107 =head2 alignFeature
00108 
00109     Arg[1]      : Bio::EnsEMBL::Feature $feature
00110     Arg[2]      : Bio::EnsEMBL::StrainSlice $strainSlice
00111     Example     : $new_feature = $alignSlice->alignFeature($feature, $strainSlice);
00112     Description : Creates a new Bio::EnsEMBL::Feature object that aligned to 
00113                   the AlignStrainSlice object.
00114     ReturnType  : Bio::EnsEMBL::Feature
00115     Exceptions  : none
00116     Caller      : general
00117 
00118 =cut
00119 
00120 sub alignFeature{
00121     my $self = shift;
00122     my $feature = shift;
00123 
00124     #check that the object is a Feature
00125     if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){ 
00126     throw("Bio::EnsEMBL::Feature object expected");
00127     }
00128     #and align it to the AlignStrainSlice object
00129     my $mapper_strain = $self->mapper();
00130 
00131     my @results;
00132   
00133     if ($feature->start > $feature->end){
00134     #this is an Indel, map it with the special method
00135     @results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
00136     #and modify the coordinates according to the length of the indel
00137     $results[0]->end($results[0]->start + $feature->length_diff -1);
00138     }
00139     else{
00140     @results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
00141      }
00142     #get need start and end of the new feature, aligned ot AlignStrainSlice
00143     my @results_ordered = sort {$a->start <=> $b->start} @results;
00144 
00145     my %new_feature = %$feature; #make a shallow copy of the Feature
00146     $new_feature{'start'}= $results_ordered[0]->start();
00147     $new_feature{'end'} = $results_ordered[-1]->end();  #get last element of the array, the end of the slice
00148 
00149     return bless \%new_feature, ref($feature);
00150     
00151 }
00152 
00153 
00154 #getter for the mapper between the Slice and the different StrainSlice objects
00155 sub mapper{
00156     my $self = shift;
00157     
00158     if (!defined $self->{'mapper'}){
00159     #get the alleleFeatures in all the strains
00160     if (!defined $self->{'indels'}){
00161         #when the list of indels is not defined, get them
00162         $self->{'indels'} = $self->_get_indels();
00163     }
00164     my $indels = $self->{'indels'}; #gaps in reference slice
00165     my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice');
00166     my $start_slice = 1;
00167     my $end_slice;
00168     my $start_align = 1;
00169     my $end_align;
00170     my $length_indel = 0;
00171     my $length_acum_indel = 0;
00172     foreach my $indel (@{$indels}){
00173         $end_slice = $indel->[0] - 1;
00174         $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels
00175 
00176         $length_indel = $indel->[1] - $indel->[0] + 1;
00177         
00178 
00179         $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align);
00180         
00181         $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel);
00182         $start_slice = $end_slice + 1;
00183         $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels
00184         
00185         $length_acum_indel += $length_indel;
00186     }
00187     if ($start_slice <= $self->length){
00188         $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice)
00189     }
00190     $self->{'mapper'} = $mapper;
00191     
00192     }
00193     return $self->{'mapper'};
00194 }
00195 
00196 #returns the length of the AlignSlice: length of the Slice plus the gaps
00197 sub length{
00198     my $self = shift;
00199     my $length;
00200     if (!defined $self->{'indels'}){
00201     #when the list of indels is not defined, get them
00202     $self->{'indels'} = $self->_get_indels();   
00203     }
00204     $length = $self->{'slice'}->length;
00205     map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}};
00206     return $length;
00207 }
00208 
00209 =head2 strains
00210 
00211   Args       : None
00212   Description: Returns list with all strains used to
00213                define this AlignStrainSlice object
00214   Returntype : listref of Bio::EnsEMBL::StrainSlice objects
00215   Exceptions : none
00216   Caller     : general
00217 
00218 =cut
00219 
00220 sub strains{
00221     my $self = shift;
00222 
00223     return $self->{'strains'};
00224 }
00225 
00226 =head2 Slice
00227 
00228   Args       : None
00229   Description: Returns slice where the AlignStrainSlice
00230                is defined
00231   Returntype : Bio::EnsEMBL::Slice object
00232   Exceptions : none
00233   Caller     : general
00234 
00235 =cut
00236 
00237 sub Slice{
00238     my $self = shift;
00239     return $self->{'slice'};
00240 }
00241 #method to retrieve, in order, a list with all the indels in the different strains
00242 sub _get_indels{
00243     my $self = shift;
00244     
00245     #go throuh all the strains getting ONLY the indels (length_diff <> 0)
00246     my @indels;
00247     foreach my $strainSlice (@{$self->strains}){
00248     my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences....
00249     foreach my $af (@{$differences}){
00250         #if length is 0, but is a -, it is still a gap in the strain
00251         if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){
00252         push @indels, $af;
00253         }
00254     }
00255     }
00256     #need to overlap the gaps using the RangeRegistry module
00257     my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new();
00258     foreach my $indel (@indels){
00259     #in the reference and the strain there is a gap
00260     $range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0);
00261     #deletion in reference slice
00262     $range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0);
00263     #insertion in reference slice
00264     $range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0);
00265     }
00266     #and return all the gap coordinates....
00267     return $range_registry->get_ranges(1);
00268 }
00269 
00270 =head2 get_all_Slices
00271 
00272   Args       : none
00273   Description: This Slice is made of several Bio::EnsEMBL::StrainSlices
00274                sequence. This method returns these StrainSlices (or part of
00275                them) with the original coordinates 
00276   Returntype : listref of Bio::EnsEMBL::StrainSlice objects
00277   Exceptions : end should be at least as big as start
00278   Caller     : general
00279 
00280 =cut
00281 
00282 sub get_all_Slices {
00283   my $self = shift;
00284 
00285   my @strains;
00286   #add the reference strain
00287   my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation');
00288   unless($dbVar) {
00289     warning("Variation database must be attached to core database to " .
00290         "retrieve variation information" );
00291     return '';
00292     }
00293   my $indAdaptor = $dbVar->get_IndividualAdaptor();
00294   my $ref_name =  $indAdaptor->get_reference_strain_name;
00295   my $ref_strain = Bio::EnsEMBL::StrainSlice->new(
00296                       -START   => $self->Slice->{'start'},
00297                       -END     => $self->Slice->{'end'},
00298                       -STRAND  => $self->Slice->{'strand'},
00299                       -ADAPTOR => $self->Slice->adaptor(),
00300                       -SEQ    => $self->Slice->{'seq'},
00301                       -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'},
00302                       -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'},
00303                       -COORD_SYSTEM    => $self->Slice->{'coord_system'},
00304                       -STRAIN_NAME     => $ref_name,
00305                        );
00306   #this is a fake reference alisce, should not contain any alleleFeature
00307   undef $ref_strain->{'alleleFeatures'};
00308   
00309   push @strains, @{$self->strains};
00310   my $new_feature;
00311   my $indel;
00312   my $aligned_features;
00313   my $indels = (); #reference to a hash containing indels in the different strains
00314   #we need to realign all Features in the different Slices and add '-' in the reference Slice
00315   foreach my $strain (@{$self->strains}){
00316       foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){
00317       $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates
00318       push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice
00319       if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain        
00320               #make a shallow copy of the indel - clear it first!
00321           $indel = undef;
00322           %{$indel} = %{$new_feature};
00323           bless $indel, ref($new_feature);
00324           $indel->allele_string('-');
00325           push @{$indels},$indel; #and include in the list of potential indels
00326       }
00327       }
00328       next if (!defined $aligned_features);
00329       undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features
00330       push @{$strain->{'alleleFeatures'}}, @{$aligned_features};
00331       undef $aligned_features;
00332   }  
00333   push @strains, $ref_strain;
00334   #need to add indels in the different strains, if not present
00335   if (defined $indels){
00336       foreach my $strain (@strains){
00337       #inlcude the indels in the StrainSlice object
00338       push @{$strain->{'alignIndels'}},@{$indels};
00339       }
00340   }
00341   return \@strains;
00342 }
00343 
00344 1;