Archive Ensembl HomeArchive Ensembl Home
EnsemblGeneGeneric.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric - default Ensembl
00024 InternalIdMapper implementation for genes
00025 
00026 =head1 SYNOPSIS
00027 
00028 =head1 DESCRIPTION
00029 
00030 =head1 METHODS
00031 
00032 =cut
00033 
00034 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric;
00035 
00036 use strict;
00037 use warnings;
00038 no warnings 'uninitialized';
00039 
00040 use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
00041 our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
00042 
00043 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00044 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
00045 
00046 
00047 #
00048 # basic mapping
00049 #
00050 sub init_basic {
00051   my $self = shift;
00052   my $num = shift;
00053   my $gsb = shift;
00054   my $mappings = shift;
00055   my $gene_scores = shift;
00056 
00057   $self->logger->info("Basic gene mapping...\n", 0, 'stamped');
00058 
00059   $mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
00060   $num++;
00061   my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $mappings,
00062     "gene_matrix$num");
00063 
00064   return ($new_scores, $mappings);
00065 }
00066 
00067 
00068 #
00069 # build the synteny from unambiguous mappings
00070 #
00071 sub synteny {
00072   my $self = shift;
00073   my $num = shift;
00074   my $gsb = shift;
00075   my $mappings = shift;
00076   my $gene_scores = shift;
00077 
00078   unless ($gene_scores->loaded) {
00079     $self->logger->info("Synteny Framework building...\n", 0, 'stamped');
00080     my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
00081     my $sf = Bio::EnsEMBL::IdMapping::SyntenyFramework->new(
00082       -DUMP_PATH    => $dump_path,
00083       -CACHE_FILE   => 'synteny_framework.ser',
00084       -LOGGER       => $self->logger,
00085       -CONF         => $self->conf,
00086       -CACHE        => $self->cache,
00087     );
00088     $sf->build_synteny($mappings);
00089 
00090     # use it to rescore the genes
00091     $self->logger->info("\nSynteny assisted mapping...\n", 0, 'stamped');
00092     $gene_scores = $sf->rescore_gene_matrix_lsf($gene_scores);
00093 
00094     # checkpoint
00095     $gene_scores->write_to_file;
00096   }
00097 
00098   my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
00099   $num++;
00100   my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
00101     "gene_matrix$num");
00102 
00103   return ($new_scores, $new_mappings); 
00104 }
00105 
00106 
00107 #
00108 # rescore with simple scoring function and try again
00109 #
00110 sub best_transcript {
00111   my $self = shift;
00112   my $num = shift;
00113   my $gsb = shift;
00114   my $mappings = shift;
00115   my $gene_scores = shift;
00116   my $transcript_scores = shift;
00117 
00118   $self->logger->info("Retry with simple best transcript score...\n", 0, 'stamped');
00119   
00120   unless ($gene_scores->loaded) {
00121     $gsb->simple_gene_rescore($gene_scores, $transcript_scores);
00122     $gene_scores->write_to_file;
00123   }
00124   
00125   my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
00126   $num++;
00127   my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
00128     "gene_matrix$num");
00129 
00130   return ($new_scores, $new_mappings); 
00131 }
00132 
00133 
00134 #
00135 # rescore by penalising scores between genes with different biotypes  
00136 #
00137 sub biotype {
00138   my $self = shift;
00139   my $num = shift;
00140   my $gsb = shift;
00141   my $mappings = shift;
00142   my $gene_scores = shift;
00143 
00144   $self->logger->info("Retry with biotype disambiguation...\n", 0, 'stamped');
00145   
00146   unless ($gene_scores->loaded) {
00147     $gsb->biotype_gene_rescore($gene_scores);
00148     $gene_scores->write_to_file;
00149   }
00150 
00151   my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
00152   $num++;
00153   my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
00154     "gene_matrix$num");
00155 
00156   return ($new_scores, $new_mappings); 
00157 }
00158 
00159 
00160 #
00161 # selectively rescore by penalising scores between genes with different
00162 # internalIDs  
00163 #
00164 sub internal_id {
00165   my $self = shift;
00166   my $num = shift;
00167   my $gsb = shift;
00168   my $mappings = shift;
00169   my $gene_scores = shift;
00170 
00171   $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
00172   
00173   unless ($gene_scores->loaded) {
00174     $gsb->internal_id_rescore($gene_scores);
00175     $gene_scores->write_to_file;
00176   }
00177 
00178   my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
00179   $num++;
00180   my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
00181     "gene_matrix$num");
00182 
00183   return ($new_scores, $new_mappings); 
00184 }
00185 
00186 
00187 1;
00188