Archive Ensembl HomeArchive Ensembl Home
EnsemblGeneric.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric - default Ensembl
00024 StableIdGenerator implementation
00025 
00026 =head1 SYNOPSIS
00027 
00028   # inject the confiured StableIdGenerator plugin
00029   my $stable_id_generator = $conf->param('plugin_stable_id_generator');
00030   inject($stable_id_generator);
00031 
00032   # create a new StableIdGenerator object
00033   my $generator_instance = $stable_id_generator->new(
00034     -LOGGER => $self->logger,
00035     -CONF   => $self->conf,
00036     -CACHE  => $self->cache
00037   );
00038 
00039   # determine starting stable ID for new assignments
00040   my $new_stable_id = $generator_instance->initial_stable_id('gene');
00041 
00042   # loop over genes
00043   foreach my $target_gene (@all_target_genes) {
00044 
00045     # if the stable Id for this gene was mapped, assign it
00046     if ( $mapping_exists{ $target_gene->id } ) {
00047       my $source_gene = $mappings{ $target_gene->id };
00048       $target_gene->stable_id( $source_gene->stable_id );
00049 
00050       # calculate and set version
00051       my $version =
00052         $generator_instance->calculate_version( $source_gene,
00053         $target_gene );
00054       $target_gene->version($version);
00055 
00056       # no mapping exists, assign a new stable Id
00057     } else {
00058       $target_gene->stable_id($new_stable_id);
00059       $target_gene->version('1');
00060 
00061     # increment the stable Id (to be assigned to the next unmapped gene)
00062       $new_stable_id =
00063         $generator_instance->increment_stable_id($new_stable_id);
00064     }
00065   }
00066 
00067 =head1 DESCRIPTION
00068 
00069 This is the default implementation for a StableIdGenerator, which
00070 is used by Bio::EnsEMBL::IdMapping::StableIdMapper to generate new
00071 stable Ids and increment versions on mapped stable Ids.  Refer to the
00072 documentation in this module if you would like to implement your own
00073 StableIdGenerator.
00074 
00075 The stable Id mapping application allows you to plugin your own
00076 implementation by specifying it with the --plugin_stable_id_generator
00077 configuration parameter.
00078 
00079 Requirements for a StableIdGenerator plugin:
00080 
00081   - inherit from Bio::EnsEMBL::IdMapping::BaseObject
00082   - implement all methods listed in METHODS below (see method POD for
00083     signatures)
00084 
00085 =head1 METHODS
00086 
00087   initial_stable_id
00088   increment_stable_id
00089   calculate_version
00090 
00091 =cut
00092 
00093 package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric;
00094 
00095 use strict;
00096 use warnings;
00097 no warnings 'uninitialized';
00098 
00099 use Bio::EnsEMBL::IdMapping::BaseObject;
00100 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
00101 
00102 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00103 
00104 
00105 =head2 initial_stable_id
00106 
00107   Arg[1]      : String $type - an entity type (gene|transcript|translation|exon)
00108   Example     : my $new_stable_id = $generator->initial_stable_id('gene');
00109   Description : Determine the initial stable Id to use for new assignments. This
00110                 method is called once at the beginning of stable Id mapping.
00111   Return type : String - a stable Id of appropriate type
00112   Exceptions  : none
00113   Caller      : Bio::EnsEMBL::IdMapping::StableIdMapper::map_stable_ids()
00114   Status      : At Risk
00115               : under development
00116 
00117 =cut
00118 
00119 sub initial_stable_id {
00120   my $self = shift;
00121   my $type = shift;
00122 
00123   my $init_stable_id;
00124 
00125   # use stable ID from configuration if set
00126   if ($init_stable_id = $self->conf->param("starting_${type}_stable_id")) {
00127     $self->logger->debug("Using pre-configured $init_stable_id as base for new $type stable IDs.\n");
00128     return $init_stable_id;
00129   }
00130 
00131   my $s_dba = $self->cache->get_DBAdaptor('source');
00132   my $s_dbh = $s_dba->dbc->db_handle;
00133 
00134   # look in the ${type}_stable_id table first
00135   my $sql = qq(
00136     SELECT MAX(stable_id)
00137     FROM ${type}_stable_id
00138     WHERE stable_id LIKE "ENS%"
00139       OR stable_id LIKE "ASMPATCH%"
00140     );
00141 
00142   $init_stable_id = $self->fetch_value_from_db($s_dbh, $sql);
00143 
00144   # also look in gene_archive to make sure there are no larger Ids there
00145   unless ($type eq 'exon') {
00146     $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive);
00147     my $archived_stable_id = $self->fetch_value_from_db($s_dbh, $sql);
00148     if ($archived_stable_id and $self->is_valid($archived_stable_id) and
00149         ($archived_stable_id gt $init_stable_id)) {
00150       $init_stable_id = $archived_stable_id;
00151     }
00152   }
00153 
00154   if ($init_stable_id) {
00155     # since $init_stable_id now is the highest existing stable Id for this
00156     # object type, we need to increment it to find the first one we want to use
00157     # for new assignments
00158     $init_stable_id = $self->increment_stable_id($init_stable_id);
00159     
00160     $self->logger->debug("Using $init_stable_id as base for new $type stable IDs.\n");
00161 
00162   } else {
00163     $self->logger->warning("Can't find highest ${type}_stable_id in source db.\n");
00164   }
00165 
00166   return $init_stable_id;
00167 }
00168 
00169 
00170 =head2 increment_stable_id
00171 
00172   Arg[1]      : String $stable_id - the stable Id to increment
00173   Example     : $next_stable_id = $generator->increment_stable_id(
00174                   $current_stable_id);
00175   Description : Increments the stable Id used for new assigments. This method is
00176                 called after each new stable Id assigment to generate the next
00177                 stable Id to be used.
00178   Return type : String - the next new stable Id
00179   Exceptions  : thrown on missing or malformed argument
00180   Caller      : Bio::EnsEMBL::IdMapping::StableIdMapper::map_stable_ids()
00181   Status      : At Risk
00182               : under development
00183 
00184 =cut
00185 
00186 sub increment_stable_id {
00187   my $self      = shift;
00188   my $stable_id = shift;
00189 
00190   if ( !$self->is_valid($stable_id) ) {
00191     throw( sprintf( "Unknown or missing stable ID '%s'", $stable_id ) );
00192   }
00193 
00194   if ( $stable_id =~ /^LRG/ ) {
00195     throw( sprintf( "We do not increment LRG genes... (got '%s'). "
00196                       . "Something's wrong.",
00197                     $stable_id ) );
00198   }
00199 
00200   $stable_id =~ /^(ENS|ASMPATCH)([A-Z]+)(\d+)$/;
00201 
00202   my $number = $3;
00203   my $new_stable_id = $1 . $2 . ( ++$number );
00204 
00205   return $new_stable_id;
00206 }
00207 
00208 
00209 =head2 is_valid
00210 
00211   Arg[1]      : String $stable_id - the stable Id to check
00212   Example     : unless ($generator->is_valid($stable_id)) {
00213                   die "Invalid stable Id: $stable_id.\n";
00214                 }
00215   Description : Tests a stable Id to be valid (according to the Ensembl stable
00216                 Id format definition).
00217   Return type : Boolean - TRUE if valid, FALSE otherwise
00218   Exceptions  : none
00219   Caller      : general
00220   Status      : At Risk
00221               : under development
00222 
00223 =cut
00224 
00225 sub is_valid {
00226   my ( $self, $stable_id ) = @_;
00227 
00228   if ( defined($stable_id) ) {
00229     if (    $stable_id =~ /^(ENS|ASMPATCH)([A-Z]+)(\d+)$/
00230          || $stable_id =~ /^LRG/ )
00231     {
00232       return 1;
00233     }
00234   }
00235 
00236   return 0;
00237 }
00238 
00239 
00240 =head2 calculate_version
00241 
00242   Arg[1]      : Bio::EnsEMBL::IdMapping::TinyFeature $s_obj - source object
00243   Arg[2]      : Bio::EnsEMBL::IdMapping::TinyFeature $t_obj - target object
00244   Example     : my $version = $generator->calculate_version($source_gene,
00245                   $target_gene);
00246                 $target_gene->version($version);
00247   Description : Determines the version for a mapped stable Id. For Ensembl
00248                 genes, the rules for incrementing the version number are:
00249                     - exons: if exon sequence changed
00250                     - transcript: if spliced exon sequence changed
00251                     - translation: if transcript changed
00252                     - gene: if any of its transcript changed
00253   Return type : String - the version to be used
00254   Exceptions  : thrown on wrong argument
00255   Caller      : Bio::EnsEMBL::IdMapping::StableIdMapper::map_stable_ids()
00256   Status      : At Risk
00257               : under development
00258 
00259 =cut
00260 
00261 sub calculate_version {
00262   my $self = shift;
00263   my $s_obj = shift;
00264   my $t_obj = shift;
00265 
00266   my $version = $s_obj->version;
00267 
00268   if ($s_obj->isa('Bio::EnsEMBL::IdMapping::TinyExon')) {
00269     
00270     # increment version if sequence changed
00271     $version++ unless ($s_obj->seq eq $t_obj->seq);
00272   
00273   } elsif ($s_obj->isa('Bio::EnsEMBL::IdMapping::TinyTranscript')) {
00274   
00275     # increment version if spliced exon sequence changed
00276     $version++ unless ($s_obj->seq_md5_sum eq $t_obj->seq_md5_sum);
00277 
00278   } elsif ($s_obj->isa('Bio::EnsEMBL::IdMapping::TinyTranslation')) {
00279 
00280     # increment version if transcript changed
00281     my $s_tr = $self->cache->get_by_key('transcripts_by_id', 'source',
00282       $s_obj->transcript_id);
00283     my $t_tr = $self->cache->get_by_key('transcripts_by_id', 'target',
00284       $t_obj->transcript_id);
00285 
00286     $version++ unless ($s_tr->seq_md5_sum eq $t_tr->seq_md5_sum);
00287     
00288   } elsif ($s_obj->isa('Bio::EnsEMBL::IdMapping::TinyGene')) {
00289     
00290     # increment version if any transcript changed
00291     my $s_tr_ident = join(":", map { $_->stable_id.'.'.$_->version }
00292       sort { $a->stable_id cmp $b->stable_id }
00293         @{ $s_obj->get_all_Transcripts });
00294     my $t_tr_ident = join(":", map { $_->stable_id.'.'.$_->version }
00295       sort { $a->stable_id cmp $b->stable_id }
00296         @{ $t_obj->get_all_Transcripts });
00297 
00298     $version++ unless ($s_tr_ident eq $t_tr_ident);
00299     
00300   } else {
00301     throw("Unknown object type: ".ref($s_obj));
00302   }
00303 
00304   return $version;
00305 }
00306 
00307 
00308 1;
00309