Archive Ensembl HomeArchive Ensembl Home
StrainSliceAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2009 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices
00024 representing alternative assemblies
00025 
00026 =head1 SYNOPSIS
00027 
00028   my $slice =
00029     $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
00030 
00031   my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice);
00032 
00033   # create a new strain slice adaptor and attach it to the MSC
00034   my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db);
00035   $msc->set_StrainSliceAdaptor($ssa);
00036   
00037   # now attach strain
00038   $msc->attach_StrainSlice('Watson');
00039 
00040 =head1 DESCRIPTION
00041 
00042 NOTE: this code is under development and not fully functional nor tested
00043 yet.  Use only for development.
00044 
00045 This adaptor is a factory for creating MappedSlices representing
00046 strains and attaching them to a MappedSliceContainer. A mapper will be created
00047 to map between the reference slice and the common container slice coordinate
00048 system.
00049 
00050 =head1 METHODS
00051 
00052   new
00053   fetch_by_name
00054 
00055 =head1 REALTED MODULES
00056 
00057   Bio::EnsEMBL::MappedSlice
00058   Bio::EnsEMBL::MappedSliceContainer
00059   Bio::EnsEMBL::AlignStrainSlice
00060   Bio::EnsEMBL::StrainSlice
00061 
00062 =cut
00063 
00064 package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor;
00065 
00066 use strict;
00067 use warnings;
00068 no warnings 'uninitialized';
00069 
00070 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00071 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00072 use Bio::EnsEMBL::MappedSlice;
00073 use Bio::EnsEMBL::Mapper;
00074 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00075 
00076 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
00077 
00078 
00079 =head2 new
00080 
00081   Example     : my $strain_slice_adaptor =
00082                   Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
00083   Description : Constructor.
00084   Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
00085   Exceptions  : none
00086   Caller      : general
00087   Status      : At Risk
00088               : under development
00089 
00090 =cut
00091 
00092 sub new {
00093   my $caller = shift;
00094 
00095   my $class = ref($caller) || $caller;
00096   my $self = $class->SUPER::new(@_);
00097   
00098   return $self;
00099 }
00100 
00101 
00102 =head2 fetch_by_name
00103 
00104   Arg[1]      : Bio::EnsEMBL::MappedSliceContainer $container - the container
00105                   to attach MappedSlices to
00106   Arg[2]      : String $name - the name of the strain to fetch
00107   Example     : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') };
00108   Description : Creates a MappedSlice representing a version of the container's
00109                 reference slice with variant alleles from the named strain
00110   Return type : listref of Bio::EnsEMBL::MappedSlice
00111   Exceptions  : thrown on wrong or missing arguments
00112   Caller      : general, Bio::EnsEMBL::MappedSliceContainer
00113   Status      : At Risk
00114               : under development
00115 
00116 =cut
00117 
00118 sub fetch_by_name {
00119   my $self = shift;
00120   my $container = shift;
00121   my $name = shift;
00122 
00123   # argueent check
00124   unless ($container and ref($container) and
00125           $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
00126     throw("Need a MappedSliceContainer.");
00127   }
00128 
00129   unless ($name) {
00130     throw("Need a strain name.");
00131   }
00132 
00133   my $slice = $container->ref_slice;
00134 
00135   # get a connection to the variation DB
00136   my $variation_db = $self->db->get_db_adaptor('variation');
00137 
00138   unless($variation_db) {
00139     warning("Variation database must be attached to core database to retrieve variation information");
00140     return '';
00141   }
00142   
00143   # now get an allele feature adaptor
00144   my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
00145   
00146   # check we got it
00147   unless(defined $af_adaptor) {
00148     warning("Not possible to retrieve AlleleFeatureAdaptor from variation database");
00149     return '';
00150   }
00151   
00152   # now get an individual adaptor
00153   my $ind_adaptor = $variation_db->get_IndividualAdaptor;
00154   
00155   # check we got it
00156   unless(defined $ind_adaptor) {
00157     warning("Not possible to retrieve IndividualAdaptor from variation database");
00158     return '';
00159   }
00160   
00161   # fetch individual object for this strain name
00162   my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)};
00163   
00164   # check we got a result
00165   unless(defined $ind) {
00166     warn("Strain ".$name." not found in the database");
00167     return '';
00168   }
00169   
00170   
00171   ## MAP STRAIN SLICE TO REF SLICE
00172   ################################
00173   
00174   # create a mapper
00175   my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice');
00176   
00177   # create a mapped_slice object  
00178   my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
00179     -ADAPTOR   => $self,
00180     -CONTAINER => $container,
00181     -NAME      => $slice->name."\#strain_$name",
00182   );
00183   
00184   # get the strain slice
00185   my $strain_slice = $slice->get_by_strain($ind->name);
00186   
00187   # get all allele features for this slice and individual
00188   #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)};
00189   
00190   # get allele features with coverage info
00191   my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1);
00192   
00193   # check we got some data
00194   #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0];
00195   
00196   
00197   my $start_slice = $slice->start;
00198   my $start_strain = 1;
00199   my $sr_name = $slice->seq_region_name;
00200   #my $sr_name = 'ref_slice';
00201   my ($end_slice, $end_strain, $allele_length);
00202   
00203   my $indel_flag = 0;
00204   my $total_length_diff = 0;
00205   
00206   # check for AFs
00207   if(defined($afs) && scalar @$afs) {
00208   
00209     # go through each AF
00210     foreach my $af(@$afs) {
00211       
00212       # find out if it changes the length
00213       if($af->length_diff != 0) {
00214         
00215         $indel_flag = 1;
00216         $total_length_diff += $af->length_diff;
00217         
00218         # get the allele length
00219         $allele_length = $af->length + $af->length_diff();
00220         
00221         $end_slice = $slice->start + $af->start() - 2;
00222   
00223         if ($end_slice >= $start_slice){
00224             $end_strain = $end_slice - $start_slice + $start_strain;
00225             
00226             #add the sequence that maps
00227             $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice);
00228             
00229             #add the indel
00230             $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
00231             
00232             $start_strain = $end_strain + $allele_length + 1;
00233         }
00234         
00235         else{
00236           
00237             #add the indel
00238             $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
00239             
00240             $start_strain += $allele_length;
00241         }
00242         
00243         $start_slice = $end_slice + $af->length+ 1;
00244       }
00245     }
00246   }
00247   
00248   # add the remaining coordinates (or the whole length if no indels found)
00249   $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end);
00250   
00251   # add the slice/mapper pair
00252   $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper);  
00253   
00254   
00255   
00256   ## MAP REF_SLICE TO CONTAINER SLICE
00257   ###################################
00258   
00259   if($total_length_diff > 0) {
00260   
00261     # create a new mapper
00262     my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
00263     
00264     # get existing pairs
00265     my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container');
00266     my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice');
00267     
00268     # we need a list of indels (specifically inserts)
00269     my @indels;
00270     
00271     # go through existing first
00272     foreach my $pair(@existing_pairs) {
00273       
00274       if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
00275         my $indel;
00276         $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start);
00277         
00278         # we're only interested in inserts here, not deletions
00279         next unless $indel->{'length_diff'} > 0;
00280         
00281         $indel->{'ref_start'} = $pair->from->start;
00282         $indel->{'ref_end'} = $pair->from->end;
00283         $indel->{'length'} = $pair->to->end - $pair->to->start;
00284         
00285         push @indels, $indel;
00286       }
00287     }
00288     
00289     # now new ones
00290     foreach my $pair(@new_pairs) {
00291       
00292       if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
00293         my $indel;
00294         $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start);
00295         
00296         # we're only interested in inserts here, not deletions
00297         next unless $indel->{'length_diff'} > 0;
00298         
00299         $indel->{'ref_start'} = $pair->to->start;
00300         $indel->{'ref_end'} = $pair->to->end;
00301         $indel->{'length'} = $pair->from->end - $pair->from->start;
00302         
00303         push @indels, $indel;
00304       }
00305     }
00306     
00307     # sort them
00308     @indels = sort {
00309       $a->{'ref_start'} <=> $b->{'ref_start'} ||  # by position
00310       $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest
00311     } @indels;
00312     
00313     # clean them
00314     my @new_indels = ();
00315     my $p = $indels[0];
00316     push @new_indels, $indels[0] if scalar @indels;
00317     
00318     for my $i(1..$#indels) {
00319       my $c = $indels[$i];
00320       
00321       if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) {
00322         push @new_indels, $c;
00323         $p = $c;
00324       }
00325     }
00326     
00327     $start_slice = $slice->start;
00328     $start_strain = 1;
00329     $sr_name = $slice->seq_region_name;
00330     #$sr_name = 'ref_slice';
00331     
00332     foreach my $indel(@new_indels) {
00333       
00334       $end_slice = $indel->{'ref_end'};
00335       $end_strain = $start_strain + ($end_slice - $start_slice);
00336       
00337       $allele_length = $indel->{'length'} + $indel->{'length_diff'};
00338       
00339       $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain);
00340       
00341       $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length);
00342       
00343       $start_strain = $end_strain + $allele_length + 1;
00344       $start_slice = $end_slice + $indel->{'length'} + 1;
00345     }
00346     
00347     $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice));
00348     
00349     # replace the mapper with the new mapper
00350     $container->mapper($new_mapper);
00351     
00352     # change the container slice's length according to length diff
00353     $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1));
00354   }
00355   
00356   return [$mapped_slice];
00357 }
00358 
00359 
00360 1;
00361