Archive Ensembl HomeArchive Ensembl Home
MappedSliceContainer.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::MappedSliceContainer - container for mapped slices
00024 
00025 =head1 SYNOPSIS
00026 
00027   # get a reference slice
00028   my $slice =
00029     $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
00030 
00031   # create MappedSliceContainer based on the reference slice
00032   my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
00033 
00034   # set the adaptor for fetching AssemblySlices
00035   my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
00036   $msc->set_AssemblySliceAdaptor($asa);
00037 
00038   # add an AssemblySlice to your MappedSliceContainer
00039   $msc->attach_AssemblySlice('NCBIM36');
00040 
00041   foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
00042     print $mapped_slice->name, "\n";
00043 
00044     foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
00045       print "  ", &to_string($sf), "\n";
00046     }
00047   }
00048 
00049 =head1 DESCRIPTION
00050 
00051 NOTE: this code is under development and not fully functional nor tested
00052 yet.  Use only for development.
00053 
00054 A MappedSliceContainer holds a collection of one or more
00055 Bio::EnsEMBL::MappedSlices. It is based on a real reference slice and
00056 contains an artificial "container slice" which defines the common
00057 coordinate system used by all attached MappedSlices. There is also a
00058 mapper to convert coordinates between the reference and the container
00059 slice.
00060 
00061 Attaching MappedSlices to the container is delegated to adaptors
00062 (which act more as object factories than as traditional Ensembl db
00063 adaptors). The adaptors will also modify the container slice and
00064 associated mapper if required. This design allows us to keep the
00065 MappedSliceContainer generic and encapsulate the data source specific
00066 code in the adaptor/factory module.
00067 
00068 In the simplest use case, all required MappedSlices are attached to the
00069 MappedSliceContainer at once (by a single call to the adaptor). This
00070 object should also allow "hot-plugging" of MappedSlices (e.g. attach a
00071 MappedSlice representing a strain to a container that already contains a
00072 multi-species alignment). The methods for attaching new MappedSlice will
00073 be responsable to perform the necessary adjustments to coordinates and
00074 mapper on the existing MappedSlices.
00075 
00076 =head1 METHODS
00077 
00078   new
00079   set_adaptor
00080   get_adaptor
00081   set_AssemblySliceAdaptor
00082   get_AssemblySliceAdaptor
00083   set_AlignSliceAdaptor (not implemented yet)
00084   get_AlignSliceAdaptor (not implemented yet)
00085   set_StrainSliceAdaptor (not implemented yet)
00086   get_StrainSliceAdaptor (not implemented yet)
00087   attach_AssemblySlice
00088   attach_AlignSlice (not implemented yet)
00089   attach_StrainSlice (not implemented yet)
00090   get_all_MappedSlices
00091   sub_MappedSliceContainer (not implemented yet)
00092   ref_slice
00093   container_slice
00094   mapper
00095   expanded
00096 
00097 =head1 RELATED MODULES
00098 
00099   Bio::EnsEMBL::MappedSlice
00100   Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
00101   Bio::EnsEMBL::Compara::AlignSlice
00102   Bio::EnsEMBL::Compara::AlignSlice::Slice
00103   Bio::EnsEMBL::AlignStrainSlice
00104   Bio::EnsEMBL::StrainSlice
00105 
00106 =cut
00107 
00108 package Bio::EnsEMBL::MappedSliceContainer;
00109 
00110 use strict;
00111 use warnings;
00112 no warnings 'uninitialized';
00113 
00114 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00115 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00116 use Bio::EnsEMBL::CoordSystem;
00117 use Bio::EnsEMBL::Slice;
00118 use Bio::EnsEMBL::Mapper;
00119 
00120 
00121 # define avalable adaptormajs to use with this container
00122 my %adaptors = map { $_ => 1 } qw(assembly align strain);
00123 
00124 
00125 =head2 new
00126 
00127   Arg [SLICE]     : Bio::EnsEMBL::Slice $slice - the reference slice for this
00128                     container
00129   Arg [EXPANDED]  : (optional) Boolean $expanded - set expanded mode (default:
00130                     collapsed)
00131   Example     : my $slice = $slice_adaptor->fetch_by_region('chromosome', 1,
00132                   9000000, 9500000);
00133                 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(
00134                     -SLICE    => $slice,
00135                     -EXPANDED => 1,
00136                 );
00137   Description : Constructor. See the general documentation of this module for 
00138                 details about this object. Note that the constructor creates an
00139                 empty container, so you'll have to attach MappedSlices to it to
00140                 be useful (this is usually done by an adaptor/factory).
00141   Return type : Bio::EnsEMBL::MappedSliceContainer
00142   Exceptions  : thrown on wrong or missing argument
00143   Caller      : general
00144   Status      : At Risk
00145               : under development
00146 
00147 =cut
00148 
00149 sub new {
00150   my $caller = shift;
00151   my $class = ref($caller) || $caller;
00152 
00153   my ($ref_slice, $expanded) = rearrange([qw(SLICE EXPANDED)], @_);
00154 
00155   # argument check
00156   unless ($ref_slice and ref($ref_slice) and
00157           ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
00158     throw("You must provide a reference slice.");
00159   }
00160 
00161   my $self = {};
00162   bless ($self, $class);
00163 
00164   # initialise object
00165   $self->{'ref_slice'} = $ref_slice;
00166   $self->{'expanded'} = $expanded || 0;
00167 
00168   $self->{'mapped_slices'} = [];
00169 
00170   # create the container slice
00171   $self->_create_container_slice($ref_slice);
00172 
00173   return $self;
00174 }
00175 
00176 
00177 #
00178 # Create an artificial slice which represents the common coordinate system used
00179 # for this MappedSliceContainer
00180 #
00181 sub _create_container_slice {
00182   my $self = shift;
00183   my $ref_slice = shift;
00184 
00185   # argument check
00186   unless ($ref_slice and ref($ref_slice) and
00187           ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
00188     throw("You must provide a reference slice.");
00189   }
00190 
00191   # create an artificial coordinate system for the container slice
00192   my $cs = Bio::EnsEMBL::CoordSystem->new(
00193       -NAME => 'container',
00194       -RANK => 1,
00195   );
00196 
00197   # Create a new artificial slice spanning your container. Initially this will
00198   # simply span your reference slice
00199   my $container_slice = Bio::EnsEMBL::Slice->new(
00200       -COORD_SYSTEM     => $cs,
00201       -START            => 1,
00202       -END              => $ref_slice->length,
00203       -STRAND           => 1,
00204       -SEQ_REGION_NAME  => 'container',
00205   );
00206 
00207   $self->{'container_slice'} = $container_slice;
00208 
00209   # Create an Mapper to map to/from the reference slice to the container coord
00210   # system.
00211   my $mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
00212   
00213   $mapper->add_map_coordinates(
00214       $ref_slice->seq_region_name,
00215       $ref_slice->start,
00216       $ref_slice->end,
00217       1,
00218       $container_slice->seq_region_name,
00219       $container_slice->start,
00220       $container_slice->end,
00221   );
00222 
00223   $self->{'mapper'} = $mapper;
00224 }
00225 
00226 
00227 =head2 set_adaptor
00228 
00229   Arg[1]      : String $type - the type of adaptor to set
00230   Arg[2]      : Adaptor $adaptor - the adaptor to set
00231   Example     : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
00232                 $msc->set_adaptor('assembly', $adaptor);
00233   Description : Parameterisable wrapper for all methods that set adaptors (see
00234                 below).
00235   Return type : same as Arg 2
00236   Exceptions  : thrown on missing type
00237   Caller      : general
00238   Status      : At Risk
00239               : under development
00240 
00241 =cut
00242 
00243 sub set_adaptor {
00244   my $self = shift;
00245   my $type = shift;
00246   my $adaptor = shift;
00247 
00248   # argument check
00249   unless ($type and $adaptors{$type}) {
00250     throw("Missing or unknown adaptor type.");
00251   }
00252 
00253   $type = ucfirst($type);
00254   my $method = "set_${type}SliceAdaptor";
00255 
00256   return $self->$method($adaptor);
00257 }
00258 
00259 
00260 =head2 get_adaptor
00261 
00262   Arg[1]      : String $type - the type of adaptor to get
00263   Example     : my $assembly_slice_adaptor = $msc->get_adaptor('assembly');
00264   Description : Parameterisable wrapper for all methods that get adaptors (see
00265                 below).
00266   Return type : An adaptor for the requested type of MappedSlice.
00267   Exceptions  : thrown on missing type
00268   Caller      : general
00269   Status      : At Risk
00270               : under development
00271 
00272 =cut
00273 
00274 sub get_adaptor {
00275   my $self = shift;
00276   my $type = shift;
00277 
00278   # argument check
00279   unless ($type and $adaptors{$type}) {
00280     throw("Missing or unknown adaptor type.");
00281   }
00282 
00283   $type = ucfirst($type);
00284   my $method = "get_${type}SliceAdaptor";
00285 
00286   return $self->$method;
00287 }
00288 
00289 
00290 =head2 set_AssemblySliceAdaptor
00291 
00292   Arg[1]      : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor - the adaptor to set
00293   Example     : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
00294                 $msc->set_AssemblySliceAdaptor($adaptor);
00295   Description : Sets an AssemblySliceAdaptor for this container. The adaptor can
00296                 be used to attach MappedSlice for alternative assemblies.
00297   Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
00298   Exceptions  : thrown on wrong or missing argument
00299   Caller      : general, $self->get_adaptor
00300   Status      : At Risk
00301               : under development
00302 
00303 =cut
00304 
00305 sub set_AssemblySliceAdaptor {
00306   my $self = shift;
00307   my $assembly_slice_adaptor = shift;
00308 
00309   unless ($assembly_slice_adaptor and ref($assembly_slice_adaptor) and
00310     $assembly_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor')) {
00311       throw("Need a Bio::EnsEMBL::AssemblySliceAdaptor.");
00312   }
00313 
00314   $self->{'adaptors'}->{'AssemblySlice'} = $assembly_slice_adaptor;
00315 }
00316 
00317 
00318 =head2 get_AssemblySliceAdaptor
00319 
00320   Example     : my $assembly_slice_adaptor = $msc->get_AssemblySliceAdaptor;
00321   Description : Gets a AssemblySliceAdaptor from this container. The adaptor can
00322                 be used to attach MappedSlice for alternative assemblies.
00323   Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
00324   Exceptions  : thrown on wrong or missing argument
00325   Caller      : general, $self->get_adaptor
00326   Status      : At Risk
00327               : under development
00328 
00329 =cut
00330 
00331 sub get_AssemblySliceAdaptor {
00332   my $self = shift;
00333 
00334   unless ($self->{'adaptors'}->{'AssemblySlice'}) {
00335     warning("No AssemblySliceAdaptor attached to MappedSliceContainer.");
00336   }
00337 
00338   return $self->{'adaptors'}->{'AssemblySlice'};
00339 }
00340 
00341 
00342 # [todo]
00343 sub set_AlignSliceAdaptor {
00344   throw("Not implemented yet!");
00345 }
00346 
00347 
00348 # [todo]
00349 sub get_AlignSliceAdaptor {
00350   throw("Not implemented yet!");
00351 }
00352 
00353 
00354 # [todo]
00355 sub set_StrainSliceAdaptor {
00356   my $self = shift;
00357   my $strain_slice_adaptor = shift;
00358 
00359   unless ($strain_slice_adaptor and ref($strain_slice_adaptor) and
00360     $strain_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::StrainSliceAdaptor')) {
00361       throw("Need a Bio::EnsEMBL::StrainSliceAdaptor.");
00362   }
00363 
00364   $self->{'adaptors'}->{'StrainSlice'} = $strain_slice_adaptor;
00365 }
00366 
00367 
00368 # [todo]
00369 sub get_StrainSliceAdaptor {
00370   my $self = shift;
00371 
00372   unless ($self->{'adaptors'}->{'StrainSlice'}) {
00373     warning("No StrainSliceAdaptor attached to MappedSliceContainer.");
00374   }
00375 
00376   return $self->{'adaptors'}->{'StrainSlice'};
00377 }
00378 
00379 
00380 =head2 attach_AssemblySlice
00381 
00382   Arg[1]      : String $version - assembly version to attach
00383   Example     : $msc->attach_AssemblySlice('NCBIM36');
00384   Description : Attaches a MappedSlice for an alternative assembly to this
00385                 container.
00386   Return type : none
00387   Exceptions  : thrown on missing argument
00388   Caller      : general, Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
00389   Status      : At Risk
00390               : under development
00391 
00392 =cut
00393 
00394 sub attach_AssemblySlice {
00395   my $self = shift;
00396   my $version = shift;
00397 
00398   throw("Need a version.") unless ($version);
00399 
00400   my $asa = $self->get_AssemblySliceAdaptor;
00401   return unless ($asa);
00402 
00403   my @mapped_slices = @{ $asa->fetch_by_version($self, $version) };
00404 
00405   push @{ $self->{'mapped_slices'} }, @mapped_slices;
00406 }
00407 
00408 
00409 =head2 attach_StrainSlice
00410 
00411   Arg[1]      : String $strain - name of strain to attach
00412   Example     : $msc->attach_StrainSlice('Watson');
00413   Description : Attaches a MappedSlice for an alternative strain to this
00414                 container.
00415   Return type : none
00416   Exceptions  : thrown on missing argument
00417   Caller      : general, Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
00418   Status      : At Risk
00419               : under development
00420 
00421 =cut
00422 
00423 sub attach_StrainSlice {
00424   my $self = shift;
00425   my $strain = shift;
00426 
00427   throw("Need a strain.") unless ($strain);
00428 
00429   my $ssa = $self->get_StrainSliceAdaptor;
00430   return unless ($ssa);
00431 
00432   my @mapped_slices = @{ $ssa->fetch_by_name($self, $strain) };
00433 
00434   push @{ $self->{'mapped_slices'} }, @mapped_slices;
00435 }
00436 
00437 
00438 
00439 =head2 get_all_MappedSlices
00440 
00441   Example     : foreach my $mapped_slice (@{ $msc->get_all_MappedSlices }) {
00442                   print $mapped_slice->name, "\n";
00443                 }
00444   Description : Returns all MappedSlices attached to this container.
00445   Return type : listref of Bio::EnsEMBL::MappedSlice
00446   Exceptions  : none
00447   Caller      : general
00448   Status      : At Risk
00449               : under development
00450 
00451 =cut
00452 
00453 sub get_all_MappedSlices {
00454   my $self = shift;
00455   return $self->{'mapped_slices'};
00456 }
00457 
00458 
00459 # [todo]
00460 sub sub_MappedSliceContainer {
00461   throw("Not implemented yet!");
00462 }
00463 
00464 
00465 =head2 ref_slice
00466 
00467   Arg[1]      : (optional) Bio::EnsEMBL::Slice - the reference slice to set
00468   Example     : my $ref_slice = $mapped_slice_container->ref_slice;
00469                 print "This MappedSliceContainer is based on the reference
00470                   slice ", $ref_slice->name, "\n";
00471   Description : Getter/setter for the reference slice.
00472   Return type : Bio::EnsEMBL::Slice
00473   Exceptions  : thrown on wrong argument type
00474   Caller      : general
00475   Status      : At Risk
00476               : under development
00477 
00478 =cut
00479 
00480 sub ref_slice {
00481   my $self = shift;
00482   
00483   if (@_) {
00484     my $slice = shift;
00485     
00486     unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
00487       throw("Need a Bio::EnsEMBL::Slice.");
00488     }
00489     
00490     $self->{'ref_slice'} = $slice;
00491   }
00492 
00493   return $self->{'ref_slice'};
00494 }
00495 
00496 
00497 =head2 container_slice
00498 
00499   Arg[1]      : (optional) Bio::EnsEMBL::Slice - the container slice to set
00500   Example     : my $container_slice = $mapped_slice_container->container_slice;
00501                 print "The common slice used by this MappedSliceContainer is ",
00502                   $container_slice->name, "\n";
00503   Description : Getter/setter for the container slice. This is an artificial
00504                 slice which defines the common coordinate system used by the
00505                 MappedSlices attached to this container.
00506   Return type : Bio::EnsEMBL::Slice
00507   Exceptions  : thrown on wrong argument type
00508   Caller      : general
00509   Status      : At Risk
00510               : under development
00511 
00512 =cut
00513 
00514 sub container_slice {
00515   my $self = shift;
00516   
00517   if (@_) {
00518     my $slice = shift;
00519     
00520     unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
00521       throw("Need a Bio::EnsEMBL::Slice.");
00522     }
00523     
00524     $self->{'container_slice'} = $slice;
00525   }
00526 
00527   return $self->{'container_slice'};
00528 }
00529 
00530 
00531 =head2 mapper
00532 
00533   Arg[1]      : (optional) Bio::EnsEMBL::Mapper - the mapper to set
00534   Example     : my $mapper = Bio::EnsEMBL::Mapper->new('ref', 'mapped');
00535                 $mapped_slice_container->mapper($mapper);
00536   Description : Getter/setter for the mapper to map between reference slice and
00537                 the artificial container coord system.
00538   Return type : Bio::EnsEMBL::Mapper
00539   Exceptions  : thrown on wrong argument type
00540   Caller      : internal, Bio::EnsEMBL::MappedSlice->AUTOLOAD
00541   Status      : At Risk
00542               : under development
00543 
00544 =cut
00545 
00546 sub mapper {
00547   my $self = shift;
00548   
00549   if (@_) {
00550     my $mapper = shift;
00551     
00552     unless (ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
00553       throw("Need a Bio::EnsEMBL::Mapper.");
00554     }
00555     
00556     $self->{'mapper'} = $mapper;
00557   }
00558 
00559   return $self->{'mapper'};
00560 }
00561 
00562 
00563 =head2 expanded
00564 
00565   Arg[1]      : (optional) Boolean - expanded mode to set
00566   Example     : if ($mapped_slice_container->expanded) {
00567                   # do more elaborate mapping than in collapsed mode
00568                   [...]
00569                 }
00570   Description : Getter/setter for expanded mode.
00571                 
00572                 By default, MappedSliceContainer use collapsed mode, which
00573                 means that no inserts in the reference sequence are allowed
00574                 when constructing the MappedSlices. in this mode, the
00575                 mapped_slice artificial coord system will be identical with the
00576                 ref_slice coord system.
00577                 
00578                 By setting expanded mode, you allow inserts in the reference
00579                 sequence.
00580   Return type : Boolean
00581   Exceptions  : none
00582   Caller      : general
00583   Status      : At Risk
00584               : under development
00585 
00586 =cut
00587 
00588 sub expanded {
00589   my $self = shift;
00590   $self->{'expanded'} = shift if (@_);
00591   return $self->{'expanded'};
00592 }
00593 
00594 =head2 seq
00595 
00596   Example     : my $seq = $container->seq()
00597   Description : Retrieves the expanded sequence of the artificial container
00598                 slice, including "-" characters where there are inserts in any
00599                 of the attached mapped slices.
00600   Return type : String
00601   Exceptions  : none
00602   Caller      : general
00603   Status      : At Risk
00604               : under development
00605 
00606 =cut
00607 
00608 sub seq {
00609   my $self = shift;
00610   
00611   my $container_seq = '';
00612   
00613   # check there's a mapper
00614   if(defined($self->mapper)) {
00615     my $start = 0;
00616     my $slice = $self->ref_slice();
00617     my $seq = $slice->seq();
00618     
00619     foreach my $coord($self->mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'ref_slice')) {
00620       # if it is a normal coordinate insert sequence
00621       if(!$coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) {
00622         $container_seq .= substr($seq, $start, $coord->length());
00623         $start += $coord->length;
00624       }
00625       
00626       # if it is a gap or indel insert "-"
00627       else {
00628         $container_seq .= '-' x $coord->length();
00629       }
00630     }
00631   }
00632   
00633   return $container_seq;
00634 }
00635 
00636 
00637 1;
00638