Archive Ensembl HomeArchive Ensembl Home
AssemblyMapperAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
00024 
00025 =head1 SYNOPSIS
00026 
00027   use Bio::EnsEMBL::Registry;
00028 
00029   Bio::EnsEMBL::Registry->load_registry_from_db(
00030     -host => 'ensembldb.ensembl.org',
00031     -user => 'anonymous'
00032   );
00033 
00034   $asma = Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
00035     "assemblymapper" );
00036 
00037   $csa = Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
00038     "coordsystem" );
00039 
00040   my $chr33_cs = $csa->fetch_by_name( 'chromosome', 'NCBI33' );
00041   my $chr34_cs = $csa->fetch_by_name( 'chromosome', 'NCBI34' );
00042   my $ctg_cs   = $csa->fetch_by_name('contig');
00043   my $clone_cs = $csa->fetch_by_name('clone');
00044 
00045   my $chr_ctg_mapper =
00046     $asma->fetch_by_CoordSystems( $chr33_cs, $ctg_cs );
00047 
00048   my $ncbi33_ncbi34_mapper =
00049     $asm_adptr->fetch_by_CoordSystems( $chr33, $chr34 );
00050 
00051   my $ctg_clone_mapper =
00052     $asm_adptr->fetch_by_CoordSystems( $ctg_cs, $clone_cs );
00053 
00054 
00055 =head1 DESCRIPTION
00056 
00057 Adaptor for handling Assembly mappers.  This is a I<Singleton> class.
00058 ie: There is only one per database (C<DBAdaptor>).
00059 
00060 This is used to retrieve mappers between any two coordinate systems
00061 whose makeup is described by the assembly table.  Currently one step
00062 (explicit) and two step (implicit) pairwise mapping is supported.  In
00063 one-step mapping an explicit relationship between the coordinate systems
00064 is defined in the assembly table.  In two-step 'chained' mapping no
00065 explicit mapping is present but the coordinate systems must share a
00066 common mapping to an intermediate coordinate system.
00067 
00068 =head1 METHODS
00069 
00070 =cut
00071 
00072 package Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor;
00073 use vars qw(@ISA);
00074 use strict;
00075 
00076 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00077 use Bio::EnsEMBL::AssemblyMapper;
00078 use Bio::EnsEMBL::ChainedAssemblyMapper;
00079 use Bio::EnsEMBL::TopLevelAssemblyMapper;
00080 
00081 use Bio::EnsEMBL::Utils::Cache; #CPAN LRU cache
00082 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
00083 #use Bio::EnsEMBL::Utils::Exception qw(deprecate throw);
00084 use Bio::EnsEMBL::Utils::SeqRegionCache;
00085 
00086 use integer; #do proper arithmetic bitshifts
00087 
00088 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
00089 
00090 
00091 my $CHUNKFACTOR = 20;  # 2^20 = approx. 10^6
00092 
00093 =head2 new
00094 
00095   Arg [1]    : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
00096                the database this assembly mapper is using.
00097   Example    : my $asma = new Bio::EnsEMBL::AssemblyMapperAdaptor($dbadaptor);
00098   Description: Creates a new AssemblyMapperAdaptor object
00099   Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
00100   Exceptions : none
00101   Caller     : Bio::EnsEMBL::DBSQL::DBAdaptor
00102   Status     : Stable
00103 
00104 =cut
00105 
00106 sub new {
00107   my($class, $dbadaptor) = @_;
00108 
00109   my $self = $class->SUPER::new($dbadaptor);
00110 
00111   $self->{'_asm_mapper_cache'} = {};
00112 
00113   # use a shared cache (for this database) that contains info about
00114   # seq regions
00115   my $seq_region_cache = $self->db->get_SeqRegionCache();
00116   $self->{'sr_name_cache'} = $seq_region_cache->{'name_cache'};
00117   $self->{'sr_id_cache'}   = $seq_region_cache->{'id_cache'};
00118 
00119   return $self;
00120 }
00121 
00122 
00123 
00124 =head2  cache_seq_ids_with_mult_assemblys
00125 
00126   Example    : $self->adaptor->cache_seq_ids_with_mult_assemblys();
00127   Description: Creates a hash of the component seq region ids that
00128                map to more than one assembly from the assembly table.
00129   Retruntype : none
00130   Exceptions : none
00131   Caller     : AssemblyMapper, ChainedAssemblyMapper
00132   Status     : At Risk
00133 
00134 =cut
00135 
00136 sub cache_seq_ids_with_mult_assemblys{
00137   my $self = shift;
00138   my %multis;
00139 
00140   return if (defined($self->{'multi_seq_ids'}));
00141 
00142   $self->{'multi_seq_ids'} = {};
00143 
00144   my $sql = qq(
00145   SELECT    sra.seq_region_id
00146   FROM      seq_region_attrib sra,
00147             attrib_type at,
00148             seq_region sr,
00149             coord_system cs
00150   WHERE     sra.attrib_type_id = at.attrib_type_id
00151     AND     code = "MultAssem"
00152     AND     sra.seq_region_id = sr.seq_region_id
00153     AND     sr.coord_system_id = cs.coord_system_id
00154     AND     cs.species_id = ?);
00155 
00156   my $sth = $self->prepare($sql);
00157 
00158   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
00159 
00160   $sth->execute();
00161 
00162   my ($seq_region_id);
00163 
00164   $sth->bind_columns(\$seq_region_id);
00165 
00166   while($sth->fetch()) {
00167     $self->{'multi_seq_ids'}->{$seq_region_id} = 1;
00168   }
00169   $sth->finish;
00170 }
00171 
00172 
00173 =head2 fetch_by_CoordSystems
00174 
00175   Arg [1]    : Bio::EnsEMBL::CoordSystem $cs1
00176                One of the coordinate systems to retrieve the mapper
00177                between
00178   Arg [2]    : Bio::EnsEMBL::CoordSystem $cs2
00179                The other coordinate system to map between
00180   Description: Retrieves an Assembly mapper for two coordinate
00181                systems whose relationship is described in the
00182                assembly table.
00183 
00184                The ordering of the coodinate systems is arbitrary.
00185                The following two statements are equivalent:
00186                $mapper = $asma->fetch_by_CoordSystems($cs1,$cs2);
00187                $mapper = $asma->fetch_by_CoordSystems($cs2,$cs1);
00188   Returntype : Bio::EnsEMBL::AssemblyMapper
00189   Exceptions : wrong argument types
00190   Caller     : general
00191   Status     : Stable
00192 
00193 =cut
00194 
00195 sub fetch_by_CoordSystems {
00196   my $self = shift;
00197   my $cs1  = shift;
00198   my $cs2  = shift;
00199 
00200   if(!ref($cs1) || !$cs1->isa('Bio::EnsEMBL::CoordSystem')) {
00201     throw("cs1 argument must be a Bio::EnsEMBL::CoordSystem.");
00202   }
00203   if(!ref($cs2) || !$cs2->isa('Bio::EnsEMBL::CoordSystem')) {
00204     throw("cs2 argument must be a Bio::EnsEMBL::CoordSystem.");
00205   }
00206 
00207 #  if($cs1->equals($cs2)) {
00208 #    throw("Cannot create mapper between same coord systems: " .
00209 #          $cs1->name . " " . $cs1->version);
00210 #  }
00211 
00212   if($cs1->is_top_level()) {
00213     return Bio::EnsEMBL::TopLevelAssemblyMapper->new($self, $cs1, $cs2);
00214   }
00215   if($cs2->is_top_level()) {
00216     return Bio::EnsEMBL::TopLevelAssemblyMapper->new($self, $cs2, $cs1);
00217   }
00218 
00219   my $csa = $self->db->get_CoordSystemAdaptor();
00220 
00221   #retrieve the shortest possible mapping path between these systems
00222   my @mapping_path = @{$csa->get_mapping_path($cs1,$cs2)};
00223 
00224   if(!@mapping_path) {
00225 
00226     # It is perfectly fine not to have a mapping. No warning needed really
00227     # Just check the return code!!
00228 
00229 #    warning(
00230 #      "There is no mapping defined between these coord systems:\n" .
00231 #      $cs1->name() . " " . $cs1->version() . " and " . $cs2->name() . " " .
00232 #      $cs2->version()
00233 #    );
00234     return undef;
00235   }
00236 
00237   my $key = join(':', map({defined($_)?$_->dbID():"-"} @mapping_path));
00238 
00239   my $asm_mapper = $self->{'_asm_mapper_cache'}->{$key};
00240 
00241   return $asm_mapper if($asm_mapper);
00242 
00243   if(@mapping_path == 1) {
00244     throw("Incorrect mapping path defined in meta table. " .
00245       "0 step mapping encountered between:\n" .
00246       $cs1->name() . " " . $cs1->version() . " and " . $cs2->name . " " .
00247       $cs2->version());
00248   }
00249 
00250   if(@mapping_path == 2) {
00251     #1 step regular mapping
00252     $asm_mapper = Bio::EnsEMBL::AssemblyMapper->new($self, @mapping_path);
00253 
00254 #   If you want multiple pieces on two seqRegions to map to each other
00255 #   you need to make an assembly.mapping entry that is seperated with a #
00256 #   instead of an |.
00257 
00258     $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper;
00259     return $asm_mapper;
00260   }
00261 
00262   if(@mapping_path == 3) {
00263    #two step chained mapping
00264     $asm_mapper = Bio::EnsEMBL::ChainedAssemblyMapper->new($self,@mapping_path);
00265     #in multi-step mapping it is possible get requests with the
00266     #coordinate system ordering reversed since both mappings directions
00267     #cache on both orderings just in case
00268     #e.g.   chr <-> contig <-> clone   and   clone <-> contig <-> chr
00269 
00270     $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper;
00271     $key = join(':', map({defined($_)?$_->dbID():"-"} reverse(@mapping_path)));
00272     $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper;
00273     return $asm_mapper;
00274   }
00275 
00276   throw("Only 1 and 2 step coordinate system mapping is currently\n" .
00277     "supported.  Mapping between " .
00278           $cs1->name() . " " . $cs1->version() . " and " .
00279           $cs2->name() . " " . $cs2->version() .
00280           " requires ". (scalar(@mapping_path)-1) . " steps.");
00281 }
00282 
00283 
00284 
00285 =head2 register_assembled
00286 
00287   Arg [1]    : Bio::EnsEMBL::AssemblyMapper $asm_mapper
00288                A valid AssemblyMapper object
00289   Arg [2]    : integer $asm_seq_region
00290                The dbID of the seq_region to be registered
00291   Arg [3]    : int $asm_start
00292                The start of the region to be registered
00293   Arg [4]    : int $asm_end
00294                The end of the region to be registered
00295   Description: Declares an assembled region to the AssemblyMapper.
00296                This extracts the relevant data from the assembly
00297                table and stores it in Mapper internal to the $asm_mapper.
00298                It therefore must be called before any mapping is
00299                attempted on that region. Otherwise only gaps will
00300                be returned.  Note that the AssemblyMapper automatically
00301                calls this method when the need arises.
00302   Returntype : none
00303   Exceptions : throw if the seq_region to be registered does not exist
00304                or if it associated with multiple assembled pieces (bad data
00305                in assembly table)
00306   Caller     : Bio::EnsEMBL::AssemblyMapper
00307   Status     : Stable
00308 
00309 =cut
00310 
00311 
00312 sub register_assembled {
00313   my $self = shift;
00314   my $asm_mapper = shift;
00315   my $asm_seq_region = shift;
00316   my $asm_start      = shift;
00317   my $asm_end        = shift;
00318 
00319   if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) {
00320     throw("Bio::EnsEMBL::AssemblyMapper argument expected");
00321   }
00322 
00323   throw("asm_seq_region argument expected") if(!defined($asm_seq_region));
00324   throw("asm_start argument expected") if(!defined($asm_start));
00325   throw("asm_end argument expected") if(!defined($asm_end));
00326 
00327   my $asm_cs_id = $asm_mapper->assembled_CoordSystem->dbID();
00328   my $cmp_cs_id = $asm_mapper->component_CoordSystem->dbID();
00329 
00330   #split up the region to be registered into fixed chunks
00331   #this allows us to keep track of regions that have already been
00332   #registered and also works under the assumption that if a small region
00333   #is requested it is likely that other requests will be made in the
00334   #vicinity (the minimum size registered the chunksize (2^chunkfactor)
00335 
00336   my @chunk_regions;
00337   #determine span of chunks
00338   #bitwise shift right is fast and easy integer division
00339 
00340   my($start_chunk, $end_chunk);
00341 
00342   $start_chunk = $asm_start >> $CHUNKFACTOR;
00343   $end_chunk   = $asm_end   >> $CHUNKFACTOR;
00344 
00345   # inserts have start = end + 1, on boundary condition start_chunk
00346   # could be less than end chunk
00347   if($asm_start == $asm_end + 1) {
00348     ($start_chunk, $end_chunk) = ($end_chunk, $start_chunk);
00349   }
00350 
00351   #find regions of continuous unregistered chunks
00352   my $i;
00353   my ($begin_chunk_region,$end_chunk_region);
00354   for ($i = $start_chunk; $i <= $end_chunk; $i++) {
00355     if($asm_mapper->have_registered_assembled($asm_seq_region, $i)) {
00356       if(defined($begin_chunk_region)) {
00357         #this is the end of an unregistered region.
00358         my $region = [($begin_chunk_region   << $CHUNKFACTOR),
00359                       (($end_chunk_region+1)     << $CHUNKFACTOR)-1];
00360         push @chunk_regions, $region;
00361         $begin_chunk_region = $end_chunk_region = undef;
00362       }
00363     } else {
00364       $begin_chunk_region = $i if(!defined($begin_chunk_region));
00365       $end_chunk_region   = $i+1;
00366       $asm_mapper->register_assembled($asm_seq_region,$i);
00367     }
00368   }
00369 
00370   #the last part may have been an unregistered region too
00371   if(defined($begin_chunk_region)) {
00372     my $region = [($begin_chunk_region << $CHUNKFACTOR),
00373                   (($end_chunk_region+1)   << $CHUNKFACTOR) -1];
00374     push @chunk_regions, $region;
00375   }
00376 
00377   return if(!@chunk_regions);
00378 
00379   # keep the Mapper to a reasonable size
00380   if( $asm_mapper->size() > $asm_mapper->max_pair_count() ) {
00381     $asm_mapper->flush();
00382     #we now have to go and register the entire requested region since we
00383     #just flushed everything
00384 
00385     @chunk_regions = ( [ ( $start_chunk << $CHUNKFACTOR)
00386                          , (($end_chunk+1) << $CHUNKFACTOR)-1 ] );
00387 
00388     for( my $i = $start_chunk; $i <= $end_chunk; $i++ ) {
00389       $asm_mapper->register_assembled( $asm_seq_region, $i );
00390     }
00391   }
00392 
00393 #  my $asm_seq_region_id =
00394 #    $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
00395 
00396   # Retrieve the description of how the assembled region is made from
00397   # component regions for each of the continuous blocks of unregistered,
00398   # chunked regions
00399 
00400   my $q = qq{
00401       SELECT
00402          asm.cmp_start,
00403          asm.cmp_end,
00404          asm.cmp_seq_region_id,
00405          sr.name,
00406          sr.length,
00407          asm.ori,
00408          asm.asm_start,
00409          asm.asm_end
00410       FROM
00411          assembly asm, seq_region sr
00412       WHERE
00413          asm.asm_seq_region_id = ? AND
00414          ? <= asm.asm_end AND
00415          ? >= asm.asm_start AND
00416          asm.cmp_seq_region_id = sr.seq_region_id AND
00417      sr.coord_system_id = ?
00418    };
00419 
00420   my $sth = $self->prepare($q);
00421 
00422   foreach my $region (@chunk_regions) {
00423     my($region_start, $region_end) = @$region;
00424     $sth->bind_param(1,$asm_seq_region,SQL_INTEGER);
00425     $sth->bind_param(2,$region_start,SQL_INTEGER);
00426     $sth->bind_param(3,$region_end,SQL_INTEGER);
00427     $sth->bind_param(4,$cmp_cs_id,SQL_INTEGER);
00428 
00429     $sth->execute();
00430 
00431     my($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $ori,
00432       $asm_start, $asm_end, $cmp_seq_region_length);
00433 
00434     $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
00435                        \$cmp_seq_region, \$cmp_seq_region_length, \$ori,
00436                        \$asm_start, \$asm_end);
00437 
00438     #
00439     # Load the unregistered regions of the mapper
00440     #
00441     while($sth->fetch()) {
00442       next if($asm_mapper->have_registered_component($cmp_seq_region_id)
00443                and !defined($self->{'multi_seq_ids'}->{$cmp_seq_region_id}));
00444       $asm_mapper->register_component($cmp_seq_region_id);
00445       $asm_mapper->mapper->add_map_coordinates(
00446                  $asm_seq_region, $asm_start, $asm_end,
00447                  $ori,
00448                  $cmp_seq_region_id, $cmp_start, $cmp_end);
00449 
00450       my $arr = [ $cmp_seq_region_id, $cmp_seq_region,
00451                   $cmp_cs_id, $cmp_seq_region_length ];
00452 
00453       $self->{'sr_name_cache'}->{"$cmp_seq_region:$cmp_cs_id"} = $arr;
00454       $self->{'sr_id_cache'}->{"$cmp_seq_region_id"} = $arr;
00455     }
00456   }
00457 
00458   $sth->finish();
00459 }
00460 
00461 
00462 
00463 sub _seq_region_name_to_id {
00464   my $self    = shift;
00465   my $sr_name = shift;
00466   my $cs_id   = shift;
00467 
00468   ($sr_name && $cs_id) || throw('seq_region_name and coord_system_id args ' .
00469                 'are required');
00470 
00471   my $arr = $self->{'sr_name_cache'}->{"$sr_name:$cs_id"};
00472   if( $arr ) {
00473     return $arr->[0];
00474   }
00475 
00476   # Get the seq_region_id via the name.  This would be quicker if we just
00477   # used internal ids instead but stored but then we lose the ability
00478   # the transform accross databases with different internal ids
00479 
00480   my $sth = $self->prepare("SELECT seq_region_id, length " .
00481                "FROM   seq_region " .
00482                "WHERE  name = ? AND coord_system_id = ?");
00483 
00484   $sth->bind_param(1,$sr_name,SQL_VARCHAR);
00485   $sth->bind_param(2,$cs_id,SQL_INTEGER);
00486   $sth->execute();
00487 
00488   if(!$sth->rows() == 1) {
00489     throw("Ambiguous or non-existant seq_region [$sr_name] " .
00490       "in coord system $cs_id");
00491   }
00492 
00493   my ($sr_id, $sr_length) = $sth->fetchrow_array();
00494   $sth->finish();
00495 
00496   $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
00497 
00498   $self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr;
00499   $self->{'sr_id_cache'}->{"$sr_id"} = $arr;
00500 
00501   return $sr_id;
00502 }
00503 
00504 sub _seq_region_id_to_name {
00505   my $self    = shift;
00506   my $sr_id = shift;
00507 
00508   ($sr_id) || throw('seq_region_is required');
00509 
00510   my $arr = $self->{'sr_id_cache'}->{"$sr_id"};
00511   if( $arr ) {
00512     return $arr->[1];
00513   }
00514 
00515   # Get the seq_region name via the id.  This would be quicker if we just
00516   # used internal ids instead but stored but then we lose the ability
00517   # the transform accross databases with different internal ids
00518 
00519   my $sth = $self->prepare("SELECT name, length ,coord_system_id " .
00520                "FROM   seq_region " .
00521                "WHERE  seq_region_id = ? ");
00522 
00523   $sth->bind_param(1,$sr_id,SQL_INTEGER);
00524   $sth->execute();
00525 
00526   if(!$sth->rows() == 1) {
00527     throw("non-existant seq_region [$sr_id]");
00528   }
00529 
00530   my ($sr_name, $sr_length, $cs_id) = $sth->fetchrow_array();
00531   $sth->finish();
00532 
00533   $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
00534 
00535   $self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr;
00536   $self->{'sr_id_cache'}->{"$sr_id"} = $arr;
00537 
00538   return $sr_name;
00539 }
00540 
00541 
00542 =head2 register_component
00543 
00544   Arg [1]    : Bio::EnsEMBL::AssemblyMapper $asm_mapper
00545                A valid AssemblyMapper object
00546   Arg [2]    : integer $cmp_seq_region
00547                The dbID of the seq_region to be registered
00548   Description: Declares a component region to the AssemblyMapper.
00549                This extracts the relevant data from the assembly
00550                table and stores it in Mapper internal to the $asm_mapper.
00551                It therefore must be called before any mapping is
00552                attempted on that region. Otherwise only gaps will
00553                be returned.  Note that the AssemblyMapper automatically
00554                calls this method when the need arises.
00555   Returntype : none
00556   Exceptions : throw if the seq_region to be registered does not exist
00557                or if it associated with multiple assembled pieces (bad data
00558                in assembly table)
00559   Caller     : Bio::EnsEMBL::AssemblyMapper
00560   Status     : Stable
00561 
00562 =cut
00563 
00564 sub register_component {
00565   my $self = shift;
00566   my $asm_mapper = shift;
00567   my $cmp_seq_region = shift;
00568 
00569   if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) {
00570     throw("Bio::EnsEMBL::AssemblyMapper argument expected");
00571   }
00572 
00573   if(!defined($cmp_seq_region)) {
00574     throw("cmp_seq_region argument expected");
00575   }
00576 
00577   my $cmp_cs_id = $asm_mapper->component_CoordSystem()->dbID();
00578   my $asm_cs_id = $asm_mapper->assembled_CoordSystem()->dbID();
00579 
00580   #do nothing if this region is already registered or special case
00581   return if($asm_mapper->have_registered_component($cmp_seq_region)
00582   and !defined($self->{'multi_seq_ids'}->{$cmp_seq_region}));
00583 
00584 #  my $cmp_seq_region_id =
00585 #    $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
00586 
00587   # Determine what part of the assembled region this component region makes up
00588 
00589   my $q = qq{
00590       SELECT
00591          asm.asm_start,
00592          asm.asm_end,
00593          asm.asm_seq_region_id,
00594          sr.name,
00595          sr.length
00596       FROM
00597          assembly asm, seq_region sr
00598       WHERE
00599          asm.cmp_seq_region_id = ? AND
00600          asm.asm_seq_region_id = sr.seq_region_id AND
00601          sr.coord_system_id = ?
00602    };
00603 
00604   my $sth = $self->prepare($q);
00605   $sth->bind_param(1,$cmp_seq_region,SQL_INTEGER);
00606   $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
00607   $sth->execute();
00608 
00609   if($sth->rows() == 0) {
00610     #this component is not used in the assembled part i.e. gap
00611     $asm_mapper->register_component($cmp_seq_region);
00612     return;
00613   }
00614 
00615   #we do not currently support components mapping to multiple assembled
00616   # make sure that you've got the correct mapping in the meta-table :
00617   #   chromosome:EquCab2#contig ( use'#' for multiple mappings )
00618   #   chromosome:EquCab2|contig ( use '|' delimiter for 1-1 mappings )
00619   #
00620   if($sth->rows() != 1) {
00621     throw("Multiple assembled regions for single " .
00622           "component region cmp_seq_region_id=[$cmp_seq_region]\n".
00623           "Remember that multiple mappings use the \#-operaator".
00624           " in the meta-table (i.e. chromosome:EquCab2\#contig\n");
00625   }
00626 
00627   my ($asm_start, $asm_end, $asm_seq_region_id,
00628       $asm_seq_region, $asm_seq_region_length) = $sth->fetchrow_array();
00629 
00630   my $arr = [ $asm_seq_region_id, $asm_seq_region,
00631               $asm_cs_id, $asm_seq_region_length ];
00632 
00633   $self->{'sr_name_cache'}->{"$asm_seq_region:$asm_cs_id"} = $arr;
00634   $self->{'sr_id_cache'}->{"$asm_seq_region_id"} = $arr;
00635 
00636   $sth->finish();
00637 
00638   # Register the corresponding assembled region. This allows a us to
00639   # register things in assembled chunks which allows us to:
00640   # (1) Keep track of what assembled regions are registered
00641   # (2) Use locality of reference (if they want something in same general
00642   #     region it will already be registered).
00643 
00644   $self->register_assembled($asm_mapper,$asm_seq_region_id,$asm_start,$asm_end);
00645 }
00646 
00647 
00648 
00649 =head2 register_chained
00650 
00651   Arg [1]    : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper
00652                The chained assembly mapper to register regions on
00653   Arg [2]    : string $from ('first' or 'last')
00654                The direction we are registering from, and the name of the
00655                internal mapper.
00656   Arg [3]    : string $seq_region_name
00657                The name of the seqregion we are registering on
00658   Arg [4]    : listref $ranges
00659                A list  of ranges to register (in [$start,$end] tuples).
00660   Arg [5]    : (optional) $to_slice
00661                Only register those on this Slice.
00662   Description: Registers a set of ranges on a chained assembly mapper.
00663                This function is at the heart of the chained mapping process.
00664                It retrieves information from the assembly table and
00665                dynamically constructs the mappings between two coordinate
00666                systems which are 2 mapping steps apart. It does this by using
00667                two internal mappers to load up a third mapper which is
00668                actually used by the ChainedAssemblyMapper to perform the
00669                mapping.
00670 
00671                This method must be called before any mapping is
00672                attempted on regions of interest, otherwise only gaps will
00673                be returned.  Note that the ChainedAssemblyMapper automatically
00674                calls this method when the need arises.
00675   Returntype : none
00676   Exceptions : throw if the seq_region to be registered does not exist
00677                or if it associated with multiple assembled pieces (bad data
00678                in assembly table)
00679 
00680                throw if the mapping between the coordinate systems cannot
00681                be performed in two steps, which means there is an internal
00682                error in the data in the meta table or in the code that creates
00683                the mapping paths.
00684   Caller     : Bio::EnsEMBL::AssemblyMapper
00685   Status     : Stable
00686 
00687 =cut
00688 
00689 sub register_chained {
00690   my $self = shift;
00691   my $casm_mapper = shift;
00692   my $from = shift;
00693   my $seq_region_id = shift;
00694   my $ranges = shift;
00695   my $to_slice = shift;
00696 
00697   my $to_seq_region_id;
00698   if(defined($to_slice)){
00699    if($casm_mapper->first_CoordSystem()->equals($casm_mapper->last_CoordSystem())){
00700       return $self->_register_chained_special($casm_mapper, $from, $seq_region_id, $ranges, $to_slice);
00701     }
00702     $to_seq_region_id = $to_slice->get_seq_region_id();
00703     if(!defined($to_seq_region_id)){
00704       die "Could not get seq_region_id for to_slice".$to_slice->seq_region_name."\n";
00705     }
00706   }
00707 
00708   my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
00709       $end_name, $end_mid_mapper, $end_cs, $end_registry);
00710 
00711   if($from eq 'first') {
00712     $start_name       = 'first';
00713     $start_mid_mapper = $casm_mapper->first_middle_mapper();
00714     $start_cs         = $casm_mapper->first_CoordSystem();
00715     $start_registry   = $casm_mapper->first_registry();
00716     $end_mid_mapper   = $casm_mapper->last_middle_mapper();
00717     $end_cs           = $casm_mapper->last_CoordSystem();
00718     $end_registry     = $casm_mapper->last_registry();
00719     $end_name         = 'last';
00720   } elsif($from eq 'last') {
00721     $start_name       = 'last';
00722     $start_mid_mapper = $casm_mapper->last_middle_mapper();
00723     $start_cs         = $casm_mapper->last_CoordSystem();
00724     $start_registry   = $casm_mapper->last_registry();
00725     $end_mid_mapper   = $casm_mapper->first_middle_mapper();
00726     $end_cs           = $casm_mapper->first_CoordSystem();
00727     $end_registry     = $casm_mapper->first_registry();
00728     $end_name         = 'first';
00729   } else {
00730     throw("Invalid from argument: [$from], must be 'first' or 'last'");
00731   }
00732 
00733   my $combined_mapper = $casm_mapper->first_last_mapper();
00734   my $mid_cs          = $casm_mapper->middle_CoordSystem();
00735   my $mid_name        = 'middle';
00736   my $csa             = $self->db->get_CoordSystemAdaptor();
00737 
00738   # Check for the simple case where the ChainedMapper is short
00739   if( ! defined $mid_cs ) {
00740       $start_mid_mapper = $combined_mapper;
00741   }
00742 
00743 
00744   ##############
00745   # obtain the first half of the mappings and load them into the start mapper
00746   #
00747 
00748   #ascertain which is component and which is actually assembled coord system
00749   my @path;
00750 
00751   # check for the simple case, where the ChainedMapper is short
00752   if( defined $mid_cs ) {
00753       @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
00754   } else {
00755       @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
00756   }
00757 
00758   if(@path != 2 && defined( $path[1] )) {
00759     my $path = join(',', map({$_->name .' '. $_->version} @path));
00760     my $len  = scalar(@path) - 1;
00761     throw("Unexpected mapping path between start and intermediate " .
00762       "coord systems (". $start_cs->name . " " . $start_cs->version .
00763       " and " . $mid_cs->name . " " . $mid_cs->version . ")." .
00764       "\nExpected path length 1, got $len. " .
00765       "(path=$path)");
00766   }
00767 
00768   my $sth;
00769   my ($asm_cs,$cmp_cs);
00770   $asm_cs = $path[0];
00771   $cmp_cs = $path[-1];
00772 
00773   #the SQL varies depending on whether we are coming from assembled or
00774   #component coordinate system
00775 
00776 my $asm2cmp = (<<ASMCMP);
00777    SELECT
00778          asm.cmp_start,
00779          asm.cmp_end,
00780          asm.cmp_seq_region_id,
00781          sr.name,
00782          sr.length,
00783          asm.ori,
00784          asm.asm_start,
00785          asm.asm_end
00786       FROM
00787          assembly asm, seq_region sr
00788       WHERE
00789          asm.asm_seq_region_id = ? AND
00790          ? <= asm.asm_end AND
00791          ? >= asm.asm_start AND
00792          asm.cmp_seq_region_id = sr.seq_region_id AND
00793      sr.coord_system_id = ?
00794 ASMCMP
00795 
00796 
00797 my $cmp2asm = (<<CMPASM);
00798    SELECT
00799          asm.asm_start,
00800          asm.asm_end,
00801          asm.asm_seq_region_id,
00802          sr.name,
00803          sr.length,
00804          asm.ori,
00805          asm.cmp_start,
00806          asm.cmp_end
00807       FROM
00808          assembly asm, seq_region sr
00809       WHERE
00810          asm.cmp_seq_region_id = ? AND
00811          ? <= asm.cmp_end AND
00812          ? >= asm.cmp_start AND
00813          asm.asm_seq_region_id = sr.seq_region_id AND
00814      sr.coord_system_id = ?
00815 CMPASM
00816 
00817   my $asm2cmp_sth;
00818   my $cmp2asm_sth;
00819   if(defined($to_slice)){
00820     my $to_cs = $to_slice->coord_system;
00821     if($asm_cs->equals($to_cs)){
00822       $asm2cmp_sth = $self->prepare($asm2cmp);
00823       $cmp2asm_sth = $self->prepare($cmp2asm." AND asm.asm_seq_region_id = $to_seq_region_id");
00824     }
00825     elsif($cmp_cs->equals($to_cs)){
00826       $asm2cmp_sth = $self->prepare($asm2cmp." AND asm.cmp_seq_region_id = $to_seq_region_id");
00827       $cmp2asm_sth = $self->prepare($cmp2asm);
00828     }
00829     else{
00830       $asm2cmp_sth = $self->prepare($asm2cmp);
00831       $cmp2asm_sth = $self->prepare($cmp2asm);
00832     }
00833   } 
00834   else{
00835     $asm2cmp_sth = $self->prepare($asm2cmp);
00836     $cmp2asm_sth = $self->prepare($cmp2asm);
00837   }
00838 
00839 
00840 
00841   $sth = ($asm_cs->equals($start_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
00842 
00843   my $mid_cs_id;
00844 
00845   # check for the simple case where the ChainedMapper is short
00846   if( defined $mid_cs ) {
00847       $mid_cs_id = $mid_cs->dbID();
00848   } else {
00849       $mid_cs_id = $end_cs->dbID();
00850   }
00851 
00852   my @mid_ranges;
00853   my @start_ranges;
00854 
00855   #need to perform the query for each unregistered range
00856   foreach my $range (@$ranges) {
00857     my ($start, $end) = @$range;
00858     $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
00859     $sth->bind_param(2,$start,SQL_INTEGER);
00860     $sth->bind_param(3,$end,SQL_INTEGER);
00861     $sth->bind_param(4,$mid_cs_id,SQL_INTEGER);
00862     $sth->execute();
00863 
00864     #load the start <-> mid mapper with the results and record the mid cs
00865     #ranges we just added to the mapper
00866 
00867     my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
00868         $ori, $start_start, $start_end);
00869 
00870     $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
00871                \$mid_seq_region, \$mid_length, \$ori, \$start_start,
00872                \$start_end);
00873 
00874     while($sth->fetch()) {
00875 
00876       if( defined $mid_cs ) {
00877         $start_mid_mapper->add_map_coordinates
00878           (
00879            $seq_region_id,$start_start, $start_end, $ori,
00880            $mid_seq_region_id, $mid_start, $mid_end
00881           );
00882       } else {
00883         if( $from eq "first" ) {
00884           $combined_mapper->add_map_coordinates
00885             (
00886              $seq_region_id,$start_start, $start_end, $ori,
00887              $mid_seq_region_id, $mid_start, $mid_end
00888             );
00889         } else {
00890           $combined_mapper->add_map_coordinates
00891             (
00892              $mid_seq_region_id, $mid_start, $mid_end, $ori,
00893              $seq_region_id,$start_start, $start_end
00894             );
00895         }
00896       }
00897 
00898       #update sr_name cache
00899       my $arr = [ $mid_seq_region_id, $mid_seq_region,
00900                   $mid_cs_id, $mid_length ];
00901 
00902       $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr;
00903       $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr;
00904 
00905       push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
00906                         $mid_start,$mid_end];
00907       push @start_ranges, [ $seq_region_id, $start_start, $start_end ];
00908 
00909       #the region that we actually register may actually be larger or smaller
00910       #than the region that we wanted to register.
00911       #register the intersection of the region so we do not end up doing
00912       #extra work later
00913 
00914       if($start_start < $start || $start_end > $end) {
00915         $start_registry->check_and_register($seq_region_id,$start_start,
00916                                             $start_end);
00917       }
00918     }
00919     $sth->finish();
00920   }
00921 
00922   # in the one step case, we load the mid ranges in the
00923   # last_registry and we are done
00924   if( ! defined $mid_cs ) {
00925     for my $range ( @mid_ranges ) {
00926       $end_registry->check_and_register( $range->[0], $range->[2],
00927                                          $range->[3] );
00928     }
00929 
00930     # and thats it for the simple case ...
00931     return;
00932   }
00933 
00934 
00935   ###########
00936   # now the second half of the mapping
00937   # perform another query and load the mid <-> end mapper using the mid cs
00938   # ranges
00939   #
00940 
00941   #ascertain which is component and which is actually assembled coord system
00942   @path = @{$csa->get_mapping_path($mid_cs, $end_cs)};
00943   if(@path == 2 || ( @path == 3 && !defined $path[1])) {
00944 
00945     $asm_cs = $path[0];
00946     $cmp_cs = $path[-1];
00947   } else {
00948     my $path = join(',', map({$_->name .' '. $_->version} @path));
00949     my $len = scalar(@path)-1;
00950     throw("Unexpected mapping path between intermediate and last" .
00951       "coord systems (". $mid_cs->name . " " . $mid_cs->version .
00952       " and " . $end_cs->name . " " . $end_cs->version . ")." .
00953       "\nExpected path length 1, got $len. " .
00954       "(path=$path)");
00955   }
00956 
00957   if(defined($to_slice)){
00958     my $to_cs = $to_slice->coord_system;
00959     if($asm_cs->equals($to_cs)){
00960       $asm2cmp_sth = $self->prepare($asm2cmp);
00961       $cmp2asm_sth = $self->prepare($cmp2asm." AND asm.asm_seq_region_id = $to_seq_region_id");
00962     }
00963     elsif($cmp_cs->equals($to_cs)){
00964       $asm2cmp_sth = $self->prepare($asm2cmp." AND asm.cmp_seq_region_id = $to_seq_region_id");
00965       $cmp2asm_sth = $self->prepare($cmp2asm);
00966     }
00967     else{
00968       $asm2cmp_sth = $self->prepare($asm2cmp);
00969       $cmp2asm_sth = $self->prepare($cmp2asm);
00970     }
00971   }
00972 
00973   $sth = ($asm_cs->equals($mid_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
00974 
00975   my $end_cs_id = $end_cs->dbID();
00976   foreach my $mid_range (@mid_ranges) {
00977     my ($mid_seq_region_id, $mid_seq_region,$start, $end) = @$mid_range;
00978     $sth->bind_param(1,$mid_seq_region_id,SQL_INTEGER);
00979     $sth->bind_param(2,$start,SQL_INTEGER);
00980     $sth->bind_param(3,$end,SQL_INTEGER);
00981     $sth->bind_param(4,$end_cs_id,SQL_INTEGER);
00982     $sth->execute();
00983 
00984     #load the end <-> mid mapper with the results and record the mid cs
00985     #ranges we just added to the mapper
00986 
00987     my ($end_start, $end_end, $end_seq_region_id, $end_seq_region, $end_length,
00988         $ori, $mid_start, $mid_end);
00989 
00990     $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
00991                \$end_seq_region, \$end_length, \$ori, \$mid_start,
00992                \$mid_end);
00993 
00994     while($sth->fetch()) {
00995       $end_mid_mapper->add_map_coordinates
00996         (
00997          $end_seq_region_id, $end_start, $end_end, $ori,
00998          $mid_seq_region_id, $mid_start, $mid_end
00999         );
01000 
01001       #update sr_name cache
01002       my $arr = [ $end_seq_region_id,$end_seq_region,$end_cs_id,$end_length ];
01003 
01004       $self->{'sr_name_cache'}->{"$end_seq_region:$end_cs_id"} = $arr;
01005       $self->{'sr_id_cache'}->{"$end_seq_region_id"} = $arr;
01006 
01007       #register this region on the end coord system
01008       $end_registry->check_and_register($end_seq_region_id, $end_start, $end_end);
01009     }
01010     $sth->finish();
01011   }
01012 
01013   #########
01014   # Now that both halves are loaded
01015   # Do stepwise mapping using both of the loaded mappers to load
01016   # the final start <-> end mapper
01017   #
01018 
01019   _build_combined_mapper(\@start_ranges, $start_mid_mapper, $end_mid_mapper,
01020                          $combined_mapper, $start_name);
01021   #all done!
01022   return;
01023 }
01024 
01025 
01026 =head2 _register_chained_special
01027 
01028   Arg [1]    : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper
01029                The chained assembly mapper to register regions on
01030   Arg [2]    : string $from ('first' or 'last')
01031                The direction we are registering from, and the name of the
01032                internal mapper.
01033   Arg [3]    : string $seq_region_name
01034                The name of the seqregion we are registering on
01035   Arg [4]    : listref $ranges
01036                A list  of ranges to register (in [$start,$end] tuples).
01037   Arg [5]    : (optional) $to_slice
01038                Only register those on this Slice.
01039   Description: Registers a set of ranges on a chained assembly mapper.
01040                This function is at the heart of the chained mapping process.
01041                It retrieves information from the assembly table and
01042                dynamically constructs the mappings between two coordinate
01043                systems which are 2 mapping steps apart. It does this by using
01044                two internal mappers to load up a third mapper which is
01045                actually used by the ChainedAssemblyMapper to perform the
01046                mapping.
01047 
01048                This method must be called before any mapping is
01049                attempted on regions of interest, otherwise only gaps will
01050                be returned.  Note that the ChainedAssemblyMapper automatically
01051                calls this method when the need arises.
01052   Returntype : none
01053   Exceptions : throw if the seq_region to be registered does not exist
01054                or if it associated with multiple assembled pieces (bad data
01055                in assembly table)
01056 
01057                throw if the mapping between the coordinate systems cannot
01058                be performed in two steps, which means there is an internal
01059                error in the data in the meta table or in the code that creates
01060                the mapping paths.
01061   Caller     : Bio::EnsEMBL::AssemblyMapper
01062   Status     : Stable
01063 
01064 =cut
01065 
01066 sub _register_chained_special {
01067   my $self = shift;
01068   my $casm_mapper = shift;
01069   my $from = shift;
01070   my $seq_region_id = shift;
01071   my $ranges = shift;
01072   my $to_slice = shift;
01073   my $found = 0;
01074 
01075   my $sth = $self->prepare("SELECT
01076               asm.cmp_start,
01077               asm.cmp_end,
01078               asm.cmp_seq_region_id,
01079               sr.name,
01080               sr.length,
01081               asm.ori,
01082               asm.asm_start,
01083               asm.asm_end
01084             FROM
01085               assembly asm, seq_region sr
01086             WHERE
01087               asm.asm_seq_region_id = ? AND
01088                       ? <= asm.asm_end AND
01089               ? >= asm.asm_start AND
01090               asm.cmp_seq_region_id = sr.seq_region_id AND
01091               sr.coord_system_id = ? AND
01092               asm.cmp_seq_region_id = ?");
01093 
01094 
01095   my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
01096       $end_name, $end_mid_mapper, $end_cs, $end_registry);
01097 
01098   if($from eq 'first') {
01099     $start_name       = 'first';
01100     $start_mid_mapper = $casm_mapper->first_middle_mapper();
01101     $start_cs         = $casm_mapper->first_CoordSystem();
01102     $start_registry   = $casm_mapper->first_registry();
01103     $end_mid_mapper   = $casm_mapper->last_middle_mapper();
01104     $end_cs           = $casm_mapper->last_CoordSystem();
01105     $end_registry     = $casm_mapper->last_registry();
01106     $end_name         = 'last';
01107   } elsif($from eq 'last') {
01108     $start_name       = 'last';
01109     $start_mid_mapper = $casm_mapper->last_middle_mapper();
01110     $start_cs         = $casm_mapper->last_CoordSystem();
01111     $start_registry   = $casm_mapper->last_registry();
01112     $end_mid_mapper   = $casm_mapper->first_middle_mapper();
01113     $end_cs           = $casm_mapper->first_CoordSystem();
01114     $end_registry     = $casm_mapper->first_registry();
01115     $end_name         = 'first';
01116   } else {
01117     throw("Invalid from argument: [$from], must be 'first' or 'last'");
01118   }
01119 
01120   my $combined_mapper = $casm_mapper->first_last_mapper();
01121   my $mid_cs          = $casm_mapper->middle_CoordSystem();
01122   my $mid_name        = 'middle';
01123   my $csa             = $self->db->get_CoordSystemAdaptor();
01124 
01125   # Check for the simple case where the ChainedMapper is short
01126   if( ! defined $mid_cs ) {
01127       $start_mid_mapper = $combined_mapper;
01128   }
01129 
01130 
01131   my @path;
01132   if( defined $mid_cs ) {
01133       @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
01134   } else {
01135       @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
01136   }
01137   if( ! defined $mid_cs ) {
01138       $start_mid_mapper = $combined_mapper;
01139   }
01140 
01141   if(@path != 2 && defined( $path[1] )) {
01142     my $path = join(',', map({$_->name .' '. $_->version} @path));
01143     my $len  = scalar(@path) - 1;
01144     throw("Unexpected mapping path between start and intermediate " .
01145       "coord systems (". $start_cs->name . " " . $start_cs->version .
01146       " and " . $mid_cs->name . " " . $mid_cs->version . ")." .
01147       "\nExpected path length 1, got $len. " .
01148       "(path=$path)");
01149   }
01150 
01151   my ($asm_cs,$cmp_cs);
01152   $asm_cs = $path[0];
01153   $cmp_cs = $path[-1];
01154 
01155   $combined_mapper = $casm_mapper->first_last_mapper();
01156   $mid_cs          = $casm_mapper->middle_CoordSystem();
01157   $mid_name        = 'middle';
01158   $csa             = $self->db->get_CoordSystemAdaptor();
01159 
01160   my $mid_cs_id;
01161 
01162   # Check for the simple case where the ChainedMapper is short
01163   if ( !defined $mid_cs ) {
01164     $start_mid_mapper = $combined_mapper;
01165   } else {
01166     $mid_cs_id = $mid_cs->dbID();
01167   }
01168 
01169   my @mid_ranges;
01170   my @start_ranges;
01171 
01172   my $to_cs = $to_slice->coord_system;
01173   foreach my $direction (1, 0){
01174     my $id1;
01175     my $id2;
01176     if($direction){
01177       $id1 = $seq_region_id;
01178       $id2 = $to_slice->get_seq_region_id();
01179     }
01180     else{
01181       $id2 = $seq_region_id;
01182       $id1 = $to_slice->get_seq_region_id();
01183     }
01184 
01185     foreach my $range (@$ranges) {
01186       my ($start, $end) = @$range;
01187       $sth->bind_param(1,$id1,SQL_INTEGER);
01188       $sth->bind_param(2,$start,SQL_INTEGER);
01189       $sth->bind_param(3,$end,SQL_INTEGER);
01190       $sth->bind_param(4,$to_cs->dbID,SQL_INTEGER);
01191       $sth->bind_param(5,$id2,SQL_INTEGER);
01192       $sth->execute();
01193 
01194       my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
01195       $ori, $start_start, $start_end);
01196 
01197       $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
01198              \$mid_seq_region, \$mid_length, \$ori, \$start_start,
01199              \$start_end);
01200 
01201       while($sth->fetch()) {
01202     $found = 1;
01203 
01204     if( defined $mid_cs ) {
01205       $start_mid_mapper->add_map_coordinates
01206         (
01207          $id1,$start_start, $start_end, $ori,
01208          $mid_seq_region_id, $mid_start, $mid_end
01209         );
01210     } else {
01211       if( $from eq "first") {
01212         if($direction){
01213           $combined_mapper->add_map_coordinates
01214         (
01215          $id1,$start_start, $start_end, $ori,
01216          $mid_seq_region_id, $mid_start, $mid_end
01217         );
01218         }
01219         else{
01220           $combined_mapper->add_map_coordinates
01221         (
01222          $mid_seq_region_id, $mid_start, $mid_end, $ori,
01223          $id1,$start_start, $start_end
01224         );
01225         }   
01226       } else {
01227         if($direction){
01228           $combined_mapper->add_map_coordinates
01229         (
01230          $mid_seq_region_id, $mid_start, $mid_end, $ori,
01231          $id1,$start_start, $start_end
01232         );
01233         }
01234         else{
01235           $combined_mapper->add_map_coordinates
01236         (
01237          $id1,$start_start, $start_end, $ori,
01238          $mid_seq_region_id, $mid_start, $mid_end
01239         );
01240         }
01241       }
01242     }
01243     
01244     #update sr_name cache
01245     my $arr = [ $mid_seq_region_id, $mid_seq_region,
01246             $mid_cs_id, $mid_length ];
01247     
01248     $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr;
01249     $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr;
01250     
01251     push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
01252               $mid_start,$mid_end];
01253     push @start_ranges, [ $id1, $start_start, $start_end ];
01254     
01255     #the region that we actually register may actually be larger or smaller
01256     #than the region that we wanted to register.
01257     #register the intersection of the region so we do not end up doing
01258     #extra work later
01259     
01260     if($start_start < $start || $start_end > $end) {
01261       $start_registry->check_and_register($id1,$start_start,
01262                           $start_end);
01263     }
01264       }
01265     $sth->finish();
01266     }
01267     if($found){
01268       if( ! defined $mid_cs ) {
01269     for my $range ( @mid_ranges ) {
01270       $end_registry->check_and_register( $range->[0], $range->[2],
01271                          $range->[3] );
01272     }
01273     
01274     # and thats it for the simple case ...
01275     return;
01276       }
01277     }
01278   }
01279 }
01280 
01281 
01282 =head2 register_all
01283 
01284   Arg [1]    : Bio::EnsEMBL::AssemblyMapper $mapper
01285   Example    : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
01286 
01287                # make cache large enough to hold all of the mappings
01288                $mapper->max_pair_count(10e6);
01289                $asm_mapper_adaptor->register_all($mapper);
01290 
01291                # perform mappings as normal
01292                $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
01293                             $sr_strand, $cs1);
01294                ...
01295   Description: This function registers the entire set of mappings between
01296                two coordinate systems in an assembly mapper.
01297                This will use a lot of memory but will be much more efficient
01298                when doing a lot of mapping which is spread over the entire
01299                genome.
01300   Returntype : none
01301   Exceptions : none
01302   Caller     : specialised prograhsm
01303   Status     : Stable
01304 
01305 =cut
01306 
01307 sub register_all {
01308   my $self = shift;
01309   my $mapper = shift;
01310 
01311   my $asm_cs_id = $mapper->assembled_CoordSystem()->dbID();
01312   my $cmp_cs_id = $mapper->component_CoordSystem()->dbID();
01313 
01314   # retrieve every relevant assembled/component pair from the assembly table
01315 
01316   my $q = qq{
01317       SELECT
01318          asm.cmp_start,
01319          asm.cmp_end,
01320          asm.cmp_seq_region_id,
01321          cmp_sr.name,
01322          cmp_sr.length,
01323          asm.ori,
01324          asm.asm_start,
01325          asm.asm_end,
01326          asm.asm_seq_region_id,
01327          asm_sr.name,
01328          asm_sr.length
01329       FROM
01330          assembly asm, seq_region asm_sr, seq_region cmp_sr
01331       WHERE
01332          asm.cmp_seq_region_id = cmp_sr.seq_region_id AND
01333          asm.asm_seq_region_id = asm_sr.seq_region_id AND
01334          cmp_sr.coord_system_id = ? AND
01335          asm_sr.coord_system_id = ?
01336    };
01337 
01338   my $sth = $self->prepare($q);
01339 
01340   $sth->bind_param(1,$cmp_cs_id,SQL_INTEGER);
01341   $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
01342   $sth->execute();
01343 
01344   # load the mapper with the assembly information
01345 
01346   my ($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $cmp_length,
01347       $ori,
01348       $asm_start, $asm_end, $asm_seq_region_id, $asm_seq_region, $asm_length);
01349 
01350   $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
01351                      \$cmp_seq_region, \$cmp_length, \$ori,
01352                      \$asm_start, \$asm_end, \$asm_seq_region_id,
01353                      \$asm_seq_region, \$asm_length);
01354 
01355   my %asm_registered;
01356 
01357   while($sth->fetch()) {
01358     $mapper->register_component($cmp_seq_region_id);
01359     $mapper->mapper->add_map_coordinates(
01360                  $asm_seq_region_id, $asm_start, $asm_end,
01361                  $ori,
01362                  $cmp_seq_region_id, $cmp_start, $cmp_end);
01363 
01364       my $arr = [$cmp_seq_region_id, $cmp_seq_region, $cmp_cs_id, $cmp_length];
01365 
01366       $self->{'sr_name_cache'}->{"$cmp_seq_region:$cmp_cs_id"} = $arr;
01367       $self->{'sr_id_cache'}->{"$cmp_seq_region_id"} = $arr;
01368 
01369     # only register each asm seq_region once since it requires some work
01370     if(!$asm_registered{$asm_seq_region_id}) {
01371       $asm_registered{$asm_seq_region_id} = 1;
01372 
01373       # register all chunks from start of seq region to end
01374       my $end_chunk = $asm_length >> $CHUNKFACTOR;
01375       for(my $i = 0; $i <= $end_chunk; $i++) {
01376         $mapper->register_assembled($asm_seq_region_id, $i);
01377       }
01378 
01379       $arr = [$asm_seq_region_id, $asm_seq_region, $asm_cs_id, $asm_length];
01380 
01381       $self->{'sr_name_cache'}->{"$asm_seq_region:$asm_cs_id"} = $arr;
01382       $self->{'sr_id_cache'}->{"$asm_seq_region_id"} = $arr;
01383     }
01384   }
01385 
01386   $sth->finish();
01387 
01388   return;
01389 }
01390 
01391 
01392 
01393 =head2 register_all_chained
01394 
01395   Arg [1]    : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper
01396   Example    : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
01397 
01398                # make the cache large enough to hold all of the mappings
01399                $mapper->max_pair_count(10e6);
01400                # load all of the mapping data
01401                $asm_mapper_adaptor->register_all_chained($mapper);
01402 
01403                # perform mappings as normal
01404                $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
01405                             $sr_strand, $cs1);
01406                ...
01407   Description: This function registers the entire set of mappings between
01408                two coordinate systems in a chained mapper.  This will use a lot
01409                of memory but will be much more efficient when doing a lot of
01410                mapping which is spread over the entire genome.
01411   Returntype : none
01412   Exceptions : throw if mapper is between coord systems with unexpected
01413                mapping paths
01414   Caller     : specialised programs doing a lot of genome-wide mapping
01415   Status     : Stable
01416 
01417 =cut
01418 
01419 sub register_all_chained {
01420   my $self = shift;
01421   my $casm_mapper = shift;
01422 
01423   my $first_cs = $casm_mapper->first_CoordSystem();
01424   my $mid_cs   = $casm_mapper->middle_CoordSystem();
01425   my $last_cs  = $casm_mapper->last_CoordSystem();
01426 
01427   my $start_mid_mapper = $casm_mapper->first_middle_mapper();
01428   my $end_mid_mapper   = $casm_mapper->last_middle_mapper();
01429   my $combined_mapper  = $casm_mapper->first_last_mapper();
01430 
01431   my @ranges;
01432 
01433   my $sth = $self->prepare(
01434      'SELECT
01435          asm.cmp_start,
01436          asm.cmp_end,
01437          asm.cmp_seq_region_id,
01438          sr_cmp.name,
01439          sr_cmp.length,
01440          asm.ori,
01441          asm.asm_start,
01442          asm.asm_end,
01443          asm.asm_seq_region_id,
01444          sr_asm.name,
01445          sr_asm.length
01446       FROM
01447          assembly asm, seq_region sr_asm, seq_region sr_cmp
01448       WHERE
01449          sr_asm.seq_region_id = asm.asm_seq_region_id AND
01450          sr_cmp.seq_region_id = asm.cmp_seq_region_id AND
01451          sr_asm.coord_system_id = ? AND
01452          sr_cmp.coord_system_id = ?');
01453 
01454   my $csa = $self->db()->get_CoordSystemAdaptor();
01455 
01456   my @path;
01457 
01458   if ( !defined $mid_cs ) {
01459     @path = @{ $csa->get_mapping_path( $first_cs, $last_cs ) };
01460     if ( !defined( $path[1] ) ) {
01461       splice( @path, 1, 1 );
01462     }
01463   } else {
01464     @path = @{ $csa->get_mapping_path( $first_cs, $mid_cs ) };
01465     # fix for when we have something like supercontig#contig#chromosome
01466     if ( !defined( $path[1] ) ) {
01467       splice( @path, 1, 1 );
01468     }
01469   }
01470 
01471   if ( @path != 2 ) {
01472     my $path =
01473       join( ',', map( { $_->name . ' ' . $_->version } @path ) );
01474     my $len = scalar(@path) - 1;
01475     throw(   "Unexpected mapping path between start and intermediate "
01476            . "coord systems ("
01477            . $first_cs->name . " "
01478            . $first_cs->version . " and "
01479            . $mid_cs->name . " "
01480            . $mid_cs->version . ")."
01481            . "\nExpected path length 1, got $len. "
01482            . "(path=$path)" );
01483   }
01484 
01485   my ($asm_cs,$cmp_cs) = @path;
01486 
01487   $sth->{mysql_use_result} = 1;
01488   $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
01489   $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
01490   $sth->execute();
01491 
01492 
01493   my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
01494       $ori, $start_start, $start_end, $start_seq_region_id, $start_seq_region,
01495       $start_length);
01496 
01497   if($asm_cs->equals($first_cs)) {
01498     $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
01499                        \$mid_seq_region, \$mid_length, \$ori, \$start_start,
01500                        \$start_end, \$start_seq_region_id, \$start_seq_region,
01501                        \$start_length);
01502   } else {
01503     $sth->bind_columns(\$start_start, \$start_end, \$start_seq_region_id,
01504                        \$start_seq_region, \$start_length, \$ori,
01505                        \$mid_start, \$mid_end, \$mid_seq_region_id,
01506                        \$mid_seq_region, \$mid_length);
01507 
01508   }
01509 
01510   my ( $mid_cs_id, $start_cs_id, $reg, $mapper );
01511   if ( !defined $mid_cs ) {
01512     $mid_cs_id   = $last_cs->dbID();
01513     $start_cs_id = $first_cs->dbID();
01514     $mapper      = $combined_mapper;
01515   } else {
01516     $mid_cs_id   = $mid_cs->dbID();
01517     $start_cs_id = $first_cs->dbID();
01518     $mapper      = $start_mid_mapper;
01519   }
01520 
01521   $reg =  $casm_mapper->first_registry();
01522 
01523   while($sth->fetch()) {
01524     $mapper->add_map_coordinates
01525       (
01526        $start_seq_region_id, $start_start, $start_end, $ori,
01527        $mid_seq_region_id, $mid_start, $mid_end
01528       );
01529     push( @ranges, [$start_seq_region_id, $start_start, $start_end ] );
01530 
01531     $reg->check_and_register( $start_seq_region_id, 1, $start_length );
01532     if( ! defined $mid_cs ) {
01533       $casm_mapper->last_registry()->check_and_register
01534     ( $mid_seq_region_id, $mid_start, $mid_end );
01535     }
01536 
01537     my $arr = [ $mid_seq_region_id, $mid_seq_region,
01538                 $mid_cs_id, $mid_length ];
01539 
01540     $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr;
01541     $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr;
01542 
01543     $arr = [ $start_seq_region_id, $start_seq_region,
01544              $start_cs_id, $start_length ];
01545 
01546     $self->{'sr_name_cache'}->{"$start_seq_region:$start_cs_id"} = $arr;
01547     $self->{'sr_id_cache'}->{"$start_seq_region_id"} = $arr;
01548   }
01549 
01550   if( ! defined $mid_cs ) {
01551     # thats it for the simple case
01552     return;
01553   }
01554 
01555 
01556   @path = @{ $csa->get_mapping_path( $last_cs, $mid_cs ) };
01557   if ( defined($mid_cs) ) {
01558     if ( !defined( $path[1] ) ) {
01559       splice( @path, 1, 1 );
01560     }
01561   }
01562 
01563   if ( @path != 2 ) {
01564     my $path =
01565       join( ',', map( { $_->name . ' ' . $_->version } @path ) );
01566     my $len = scalar(@path) - 1;
01567     throw(   "Unexpected mapping path between intermediate and last "
01568            . "coord systems ("
01569            . $last_cs->name . " "
01570            . $last_cs->version . " and "
01571            . $mid_cs->name . " "
01572            . $mid_cs->version . ")."
01573            . "\nExpected path length 1, got $len. "
01574            . "(path=$path)" );
01575   }
01576 
01577   ($asm_cs,$cmp_cs) = @path;
01578 
01579   $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
01580   $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
01581   $sth->execute();
01582 
01583 
01584   my ($end_start, $end_end, $end_seq_region_id, $end_seq_region,
01585       $end_length);
01586 
01587   if($asm_cs->equals($mid_cs)) {
01588     $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
01589                        \$end_seq_region, \$end_length, \$ori,
01590                        \$mid_start, \$mid_end, \$mid_seq_region_id,
01591                        \$mid_seq_region, \$mid_length);
01592   } else {
01593     $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
01594                        \$mid_seq_region, \$mid_length, \$ori, \$end_start,
01595                        \$end_end, \$end_seq_region_id, \$end_seq_region,
01596                        \$end_length);
01597   }
01598 
01599   my $end_cs_id = $last_cs->dbID();
01600   $reg = $casm_mapper->last_registry();
01601 
01602   while($sth->fetch()) {
01603     $end_mid_mapper->add_map_coordinates
01604       (
01605        $end_seq_region_id, $end_start, $end_end, $ori,
01606        $mid_seq_region_id, $mid_start, $mid_end
01607       );
01608 
01609     $reg->check_and_register( $end_seq_region_id, 1, $end_length );
01610 
01611     my $arr = [ $end_seq_region_id, $end_seq_region,
01612              $end_cs_id, $end_length ];
01613     $self->{'sr_name_cache'}->{"$end_seq_region:$end_cs_id"} = $arr;
01614     $self->{'sr_id_cache'}->{"$end_seq_region_id"} = $arr;
01615   }
01616 
01617   _build_combined_mapper( \@ranges, $start_mid_mapper, $end_mid_mapper,
01618                           $combined_mapper, "first" );
01619 
01620   return;
01621 }
01622 
01623 
01624 
01625 # after both halves of a chained mapper are loaded
01626 # this function maps all ranges in $ranges and loads the
01627 # results into the combined mapper
01628 sub _build_combined_mapper {
01629   my $ranges = shift;
01630   my $start_mid_mapper = shift;
01631   my $end_mid_mapper = shift;
01632   my $combined_mapper = shift;
01633   my $start_name = shift;
01634 
01635   my $mid_name = "middle";
01636 
01637   foreach my $range (@$ranges) {
01638     my ( $seq_region_id, $start, $end) = @$range;
01639 
01640     my $sum = 0;
01641 
01642     my @initial_coords = $start_mid_mapper->map_coordinates($seq_region_id,
01643                                                             $start,$end,1,
01644                                                             $start_name);
01645 
01646     foreach my $icoord (@initial_coords) {
01647       #skip gaps
01648       if($icoord->isa('Bio::EnsEMBL::Mapper::Gap')) {
01649         $sum += $icoord->length();
01650         next;
01651       }
01652 
01653 
01654       #feed the results of the first mapping into the second mapper
01655       my @final_coords =
01656         $end_mid_mapper->map_coordinates($icoord->id, $icoord->start,
01657                                          $icoord->end,
01658                                          $icoord->strand, $mid_name);
01659 
01660 
01661       foreach my $fcoord (@final_coords) {
01662         #load up the final mapper
01663 
01664         if($fcoord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
01665           my $total_start = $start + $sum;
01666           my $total_end   = $total_start + $fcoord->length - 1;
01667           my $ori = $fcoord->strand();
01668 
01669           if($start_name eq 'first') { # add coords in consistant order
01670             $combined_mapper->add_map_coordinates(
01671                              $seq_region_id, $total_start, $total_end, $ori,
01672                              $fcoord->id(), $fcoord->start(), $fcoord->end());
01673           } else {
01674             $combined_mapper->add_map_coordinates(
01675                         $fcoord->id(), $fcoord->start(), $fcoord->end(),$ori,
01676                         $seq_region_id, $total_start, $total_end);
01677           }
01678 
01679         }
01680         $sum += $fcoord->length();
01681       }
01682     }
01683   }
01684   #all done!
01685 }
01686 
01687 
01688 =head2 seq_regions_to_ids
01689 
01690   Arg [1]    : Bio::EnsEMBL::CoordSystem $coord_system
01691   Arg [2]    : listref of strings $seq_regions
01692   Example    : my @ids = @{$asma->seq_regions_to_ids($coord_sys, \@seq_regs)};
01693   Description: Converts a list of seq_region names to internal identifiers
01694                using the internal cache that has accumulated while registering
01695                regions for AssemblyMappers. If any requested regions are
01696                not  found in the cache an attempt is made to retrieve them
01697                from the database.
01698   Returntype : listref of ints
01699   Exceptions : throw if a non-existant seqregion is provided
01700   Caller     : general
01701   Status     : Stable
01702 
01703 =cut
01704 
01705 sub seq_regions_to_ids {
01706   my $self = shift;
01707   my $coord_system = shift;
01708   my $seq_regions = shift;
01709 
01710   my $cs_id = $coord_system->dbID();
01711 
01712   my @out;
01713 
01714   foreach my $sr (@$seq_regions) {
01715     my $arr = $self->{'sr_name_cache'}->{"$sr:$cs_id"};
01716     if( $arr ) {
01717       push( @out, $arr->[0] );
01718     } else {
01719       push @out, $self->_seq_region_name_to_id($sr,$cs_id);
01720     }
01721   }
01722 
01723   return \@out;
01724 }
01725 
01726 
01727 =head2 seq_ids_to_regions
01728 
01729   Arg [1]    : listref of   seq_region ids
01730   Example    : my @ids = @{$asma->ids_to_seq_regions(\@seq_ids)};
01731   Description: Converts a list of seq_region ids to seq region names
01732                using the internal cache that has accumulated while registering
01733                regions for AssemblyMappers. If any requested regions are
01734                not  found in the cache an attempt is made to retrieve them
01735                from the database.
01736   Returntype : listref of strings
01737   Exceptions : throw if a non-existant seq_region_id is provided
01738   Caller     : general
01739   Status     : Stable
01740 
01741 =cut
01742 
01743 sub seq_ids_to_regions {
01744   my $self = shift;
01745   my $seq_region_ids = shift;
01746 
01747   my @out;
01748 
01749   foreach my $sr (@$seq_region_ids) {
01750     my $arr = $self->{'sr_id_cache'}->{"$sr"};
01751     if( $arr ) {
01752       push( @out, $arr->[1] );
01753     } else {
01754       push @out, $self->_seq_region_id_to_name($sr);
01755     }
01756   }
01757 
01758   return \@out;
01759 }
01760 
01761 =head2 delete_cache
01762 
01763  Description: Delete all the caches for the mappings/seq_regions
01764  Returntype : none
01765  Exceptions : none
01766  Caller     : General
01767  Status     : At risk
01768 
01769 =cut
01770 
01771 sub delete_cache{
01772   my ($self) = @_;
01773 
01774   $self->{'sr_name_cache'}     = {};
01775   $self->{'sr_id_cache'}       = {};
01776 
01777   foreach my $key (keys %{$self->{'_asm_mapper_cache'}}){
01778     $self->{'_asm_mapper_cache'}->{$key}->flush();
01779   }
01780   $self->{'_asm_mapper_cache'} = {};
01781 }
01782 
01783 
01784 =head2 register_region
01785 
01786   Description: DEPRECATED use register_assembled instead
01787 
01788 =cut
01789 
01790 sub register_region{
01791   my ($self, $assmapper, $type, $chr_name, $start, $end) = @_;
01792 
01793   deprecate('Use register_assembled instead');
01794 
01795   $self->register_assembled($assmapper, $chr_name, $start, $end);
01796 }
01797 
01798 
01799 =head2 register_contig
01800 
01801   Description: DEPRECATED use register_component instead
01802 
01803 =cut
01804 
01805 sub register_contig {
01806    my ($self, $assmapper, $type, $contig_id ) = @_;
01807 
01808    deprecate('Use register_component instead');
01809 
01810    #not sure if the use is passing in a seq_region_name or a
01811    #seq_region_id...
01812    register_component($assmapper, $contig_id);
01813 }
01814 
01815 
01816 =head2 fetch_by_type
01817 
01818   Description: DEPRECATED use fetch_by_CoordSystems instead
01819 
01820 =cut
01821 
01822 sub fetch_by_type{
01823   my ($self,$type) = @_;
01824 
01825   deprecate('Use fetch_by_CoordSystems instead');
01826 
01827   #assume that what the user wanted was a mapper between the sequence coord
01828   #level and the top coord level
01829 
01830   my $csa = $self->db()->get_CoordSystemAdaptor();
01831 
01832   my $cs1 = $csa->fetch_top_level($type);
01833   my $cs2 = $csa->fetch_sequence_level();
01834 
01835   return $self->fetch_by_CoordSystems($cs1,$cs2);
01836 }
01837 
01838 
01839 
01840 1;