Archive Ensembl HomeArchive Ensembl Home
PredictionTranscript.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 PredictionTranscript
00024 
00025 =head1 SYNOPSIS
00026 
00027 =head1 DESCRIPTION
00028 
00029 Container for single transcript ab initio gene prediction such as
00030 GenScan or SNAP. Is directly storable/retrievable in Ensembl using
00031 PredictionTranscriptAdaptor.
00032 
00033 Creation:
00034 
00035   my $tran = new Bio::EnsEMBL::PredictionTranscript();
00036   $tran->add_Exon($pred_exon);
00037 
00038   my $tran =
00039     new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons );
00040 
00041 Manipulation:
00042 
00043   # Returns an array of PredictionExon objects
00044   my @pred_exons = @{ $tran->get_all_Exons };
00045 
00046   # Returns the peptide translation as string
00047   my $pep = $tran->translate()->seq();
00048 
00049   # Get the exon cdna sequence.
00050   my $cdna = $trans->spliced_seq();
00051 
00052 =head1 METHODS
00053 
00054 =cut
00055 
00056 package Bio::EnsEMBL::PredictionTranscript;
00057 
00058 use vars qw(@ISA);
00059 use strict;
00060 
00061 use Bio::EnsEMBL::Feature;
00062 use Bio::EnsEMBL::Transcript;
00063 use Bio::EnsEMBL::Translation;
00064 
00065 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
00066 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
00067 
00068 @ISA = qw(Bio::EnsEMBL::Transcript);
00069 
00070 
00071 =head2 new
00072 
00073   Arg [-DISPLAY_LABEL]
00074     string - a displayable identifier for this prediction
00075   Arg [...]  : See Bio::EnsEMBL::Transcript superclass constructor
00076   Example    : $pt = Bio::EnsEMBL::PredictionTranscript->new
00077                   ( '-start'         =>  $seq_region_start,
00078                     '-end'           =>  $seq_region_end,
00079                     '-strand'        =>  $seq_region_strand,
00080                     '-adaptor'       =>  $self,
00081                     '-slice'         =>  $slice,
00082                     '-analysis'      =>  $analysis,
00083                     '-dbID'          =>  $prediction_transcript_id,
00084                     '-display_label' =>  $display_label);
00085   Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript
00086                object
00087   Returntype : Bio::EnsEMBL::PredictionTranscript
00088   Exceptions : none
00089   Caller     : general
00090   Status     : Stable
00091 
00092 =cut
00093 
00094 sub new {
00095   my $class = shift;
00096 
00097   my $self = $class->SUPER::new(@_);
00098 
00099   my ($display_label) = rearrange(['DISPLAY_LABEL'], @_);
00100 
00101   $self->{'display_label'} = $display_label;
00102 
00103   return $self;
00104 }
00105 
00106 
00107 =head2 coding_region_start
00108 
00109   Arg [1]    : none
00110   Example    : $coding_region_start = $pt->coding_region_start
00111   Description: Retrieves the start of the coding region of this transcript in
00112                slice coordinates.  For prediction transcripts this
00113                is always the start of the transcript (i.e. there is no UTR).
00114                By convention, the coding_region_start is always lower than
00115                the value returned by the coding_end method.
00116                The value returned by this function is NOT the biological
00117                coding start since on the reverse strand the biological coding
00118                start would be the higher genomic value.
00119   Returntype : int
00120   Exceptions : none
00121   Caller     : general
00122   Status     : Stable
00123 
00124 =cut
00125 
00126 sub coding_region_start {
00127   my $self = shift;
00128   return $self->start();
00129 }
00130 
00131 
00132 =head2 coding_region_end
00133 
00134   Arg [1]    : none
00135   Example    : $coding_region_end = $transcript->coding_region_end
00136   Description: Retrieves the start of the coding region of this prediction
00137                transcript. For prediction transcripts this is always the same
00138                as the end since no UTRs are stored.
00139                By convention, the coding_region_end is always higher than the
00140                value returned by the coding_region_start method.
00141                The value returned by this function is NOT the biological
00142                coding start since on the reverse strand the biological coding
00143                end would be the lower genomic value.
00144   Returntype : int
00145   Exceptions : none
00146   Caller     : general
00147   Status     : Stable
00148 
00149 =cut
00150 
00151 sub coding_region_end {
00152   my $self = shift;
00153   return $self->end();
00154 }
00155 
00156 
00157 
00158 =head2 get_all_translateable_Exons
00159 
00160   Arg [1]    : none
00161   Example    : $exons = $self->get_all_translateable_Exons
00162   Description: Retrieves the translateable portion of all exons in this
00163                transcript.  For prediction transcripts this means all exons
00164                since no UTRs are stored for them.
00165   Returntype : listref of Bio::EnsEMBL::PredictionExons
00166   Exceptions : none
00167   Caller     : general
00168   Status     : Stable
00169 
00170 =cut
00171 
00172 sub get_all_translateable_Exons {
00173   my $self = shift;
00174   return $self->get_all_Exons();
00175 }
00176 
00177 
00178 =head2 display_label
00179 
00180   Arg [1]    : string $newval (optional)
00181                The new value to set the display_label attribute to
00182   Example    : $display_label = $pt->display_label()
00183   Description: Getter/Setter for a displayable identifier for this
00184                prediction transcript.
00185   Returntype : string
00186   Exceptions : none
00187   Caller     : general
00188   Status     : Stable
00189 
00190 =cut
00191 
00192 sub display_label{
00193   my $self = shift;
00194   return $self->{'display_label'} = shift if(@_);
00195   return $self->{'display_label'};
00196 }
00197 
00198 
00199 
00200 =head2 stable_id
00201 
00202   Arg [1]    : none
00203   Example    : print $pt->stable_id();
00204   Description: Gets a 'stable' identifier for this prediction transcript.  Note
00205                that prediction transcripts do not have true *stable*
00206                identifiers (i.e. identifiers maintained between releases).
00207                This method chains to the display_label method and is intended
00208                to provide polymorphism with the Transcript class.
00209   Returntype : string
00210   Exceptions : none
00211   Caller     : general
00212   Status     : Stable
00213 
00214 =cut
00215 
00216 sub stable_id { return display_label(@_); }
00217 
00218 sub get_all_DBEntries { return []; }
00219 
00220 sub get_all_DBLinks { return []; }
00221 
00222 sub add_DBEntry {}
00223 
00224 sub external_db { return undef; }
00225 
00226 sub external_status { return undef; }
00227 
00228 sub external_name { return undef; }
00229 
00230 sub is_known { return 0;}
00231 
00232 
00233 =head2 translation
00234 
00235   Arg [1]    : none
00236   Example    : $translation = $pt->translation();
00237   Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction
00238                transcript.  Note that this translation is generated on the fly
00239                and is not stored in the database.  The translation always
00240                spans the entire transcript (no UTRs; all CDS) and does not
00241                have an associated dbID, stable_id or adaptor.
00242   Returntype : int
00243   Exceptions : none
00244   Caller     : general
00245   Status     : Stable
00246 
00247 =cut
00248 
00249 sub translation {
00250   my $self = shift;
00251 
00252   #calculate translation on the fly
00253   my $strand = $self->strand();
00254 
00255   my $start_exon;
00256   my $end_exon;
00257 
00258   my @exons = @{$self->get_all_Exons()};
00259 
00260   return undef if(!@exons);
00261 
00262   $start_exon = $exons[0];
00263   $end_exon = $exons[-1];
00264 
00265   my $pta;
00266 
00267   if($self->adaptor()) {
00268     $pta = $self->adaptor()->db()->get_TranslationAdaptor();
00269   } else {
00270     #warning("PredictionTranscript has no adaptor, may not be able to obtain " .
00271            # "translation");
00272   }
00273 
00274   my $Xseq = $self->spliced_seq();
00275   my $start_phase = $start_exon->phase;
00276   if( $start_phase > 0 ) {
00277     $Xseq = "N"x$start_phase . $Xseq;
00278   }
00279 
00280   my $tmpSeq = new Bio::Seq( -id       => $self->display_id,
00281                              -seq      => $Xseq,
00282                              -moltype  => 'dna',
00283                              -alphabet => 'dna' );
00284 
00285   return Bio::EnsEMBL::Translation->new
00286     (-ADAPTOR    => $pta,
00287      -START_EXON => $start_exon,
00288      -END_EXON   => $end_exon,
00289      -SEQ_START  => 1,
00290      -SEQ_END    => $end_exon->length(),
00291      -SEQ        => $tmpSeq->translate()->seq());
00292 }
00293 
00294 
00295 
00296 =head2 translate
00297 
00298   Args      : none
00299   Function  : Give a peptide translation of all exons currently in
00300               the PT. Gives empty string when none is in.
00301   Returntype: a Bio::Seq as in transcript->translate()
00302   Exceptions: none
00303   Caller    : general
00304   Status     : Stable
00305 
00306 =cut
00307 
00308 
00309 sub translate {
00310   my ($self) = @_;
00311 
00312   my $dna = $self->translateable_seq();
00313 
00314   my $codon_table_id;
00315   if ( defined( $self->slice() ) ) {
00316       my $attrib;
00317       
00318       ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
00319       if ( defined($attrib) ) {
00320       $codon_table_id = $attrib->value();
00321       }
00322   }
00323   $codon_table_id ||= 1; #default will be vertebrates
00324 
00325   if( CORE::length( $dna ) % 3 == 0 ) {
00326    # $dna =~ s/TAG$|TGA$|TAA$//i;
00327       my $codon_table =  Bio::Tools::CodonTable->new( -id => $codon_table_id );
00328       
00329       if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) {
00330       substr( $dna, -3, 3, '' );
00331       }
00332   }
00333   # the above line will remove the final stop codon from the mrna
00334   # sequence produced if it is present, this is so any peptide produced
00335   # won't have a terminal stop codon
00336   # if you want to have a terminal stop codon either comment this line out
00337   # or call translatable seq directly and produce a translation from it
00338 
00339   my $bioseq = new Bio::Seq( -id       => $self->display_id,
00340                              -seq      => $dna,
00341                              -moltype  => 'dna',
00342                              -alphabet => 'dna' );
00343 
00344   my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id);
00345 
00346   return $translation;
00347 }
00348 
00349 
00350 =head2 cdna_coding_start
00351 
00352   Arg [1]    : none
00353   Example    : $relative_coding_start = $transcript->cdna_coding_start();
00354   Description: Retrieves the position of the coding start of this transcript
00355                in cdna coordinates (relative to the start of the 5prime end of
00356                the transcript, excluding introns, including utrs). This is
00357                always 1 for prediction transcripts because they have no UTRs.
00358   Returntype : int
00359   Exceptions : none
00360   Caller     : five_prime_utr, get_all_snps, general
00361   Status     : Stable
00362 
00363 =cut
00364 
00365 sub cdna_coding_start { return 1 }
00366 
00367 
00368 
00369 =head2 cdna_coding_end
00370 
00371   Arg [1]    : none
00372   Example    : $relative_coding_start = $transcript->cdna_coding_end();
00373   Description: Retrieves the position of the coding end of this transcript
00374                in cdna coordinates (relative to the start of the 5prime end of
00375                the transcript, excluding introns, including utrs). This is
00376                always te length of the cdna for prediction transcripts because
00377                they have no UTRs.
00378   Returntype : int
00379   Exceptions : none
00380   Caller     : five_prime_utr, get_all_snps, general
00381   Status     : Stable
00382 
00383 =cut
00384 
00385 sub cdna_coding_end {
00386   my ($self) = @_;
00387   return length( $self->spliced_seq() );
00388 }
00389 
00390 
00391 =head2 transform
00392 
00393   Arg  1     : String $coordinate_system_name
00394   Arg [2]    : String $coordinate_system_version
00395   Example    : $ptrans = $ptrans->transform('chromosome', 'NCBI33');
00396                $ptrans = $ptrans->transform('clone');
00397   Description: Moves this PredictionTranscript to the given coordinate system.
00398                If this Transcript has Exons attached, they move as well.
00399                A new Transcript is returned or undefined if this PT is not
00400                defined in the new coordinate system.
00401   Returntype : Bio::EnsEMBL::PredictionTranscript
00402   Exceptions : wrong parameters
00403   Caller     : general
00404   Status     : Stable
00405 
00406 =cut
00407 
00408 sub transform {
00409   my $self = shift;
00410 
00411   # catch for old style transform calls
00412   if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) {
00413     throw("transform needs coordinate systems details now," .
00414           "please use transfer");
00415   }
00416 
00417   my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ );
00418   return undef unless $new_transcript;
00419 
00420   #go through the _trans_exon_array so as not to prompt lazy-loading
00421   if(exists($self->{'_trans_exon_array'})) {
00422     my @new_exons;
00423     foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
00424       my $new_exon = $old_exon->transform(@_);
00425       push(@new_exons, $new_exon);
00426     }
00427     $new_transcript->{'_trans_exon_array'} = \@new_exons;
00428   }
00429 
00430   return $new_transcript;
00431 }
00432 
00433 
00434 
00435 =head2 transfer
00436 
00437   Arg  1     : Bio::EnsEMBL::Slice $destination_slice
00438   Example    : $ptrans = $ptrans->transfer($slice);
00439   Description: Moves this PredictionTranscript to the given slice.
00440                If this Transcripts has Exons attached, they move as well.
00441                If this transcript cannot be moved then undef is returned
00442                instead.
00443   Returntype : Bio::EnsEMBL::PredictionTranscript
00444   Exceptions : none
00445   Caller     : general
00446   Status     : Stable
00447 
00448 =cut
00449 
00450 sub transfer {
00451   my $self = shift;
00452 
00453   my $new_transcript = $self->SUPER::transfer( @_ );
00454   return undef unless $new_transcript;
00455 
00456   if( exists $self->{'_trans_exon_array'} ) {
00457     my @new_exons;
00458     for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
00459       my $new_exon = $old_exon->transfer( @_ );
00460       push( @new_exons, $new_exon );
00461     }
00462 
00463     $new_transcript->{'_trans_exon_array'} = \@new_exons;
00464   }
00465 
00466   return $new_transcript;
00467 }
00468 
00469 =head2 get_all_Exons
00470 
00471   Arg [1]    : none
00472   Example    : my @exons = @{$transcript->get_all_Exons()};
00473   Description: Returns an listref of the exons in this transcipr in order.
00474                i.e. the first exon in the listref is the 5prime most exon in 
00475                the transcript.
00476   Returntype : a list reference to Bio::EnsEMBL::Exon objects
00477   Exceptions : none
00478   Caller     : general
00479   Status     : Stable
00480 
00481 =cut
00482 
00483 sub get_all_Exons {
00484    my ($self) = @_;
00485    if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
00486      $self->{'_trans_exon_array'} = $self->adaptor()->db()->
00487        get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self );
00488    }
00489    return $self->{'_trans_exon_array'};
00490 }
00491 
00492 =head2 display_id
00493 
00494   Arg [1]    : none
00495   Example    : print $rf->display_id();
00496   Description: This method returns a string that is considered to be
00497                the 'display' identifier. For prediction transcripts this is
00498                (depending on availability and in this order) the stable Id, the
00499                dbID or an empty string.
00500   Returntype : string
00501   Exceptions : none
00502   Caller     : web drawing code
00503   Status     : Stable
00504 
00505 =cut
00506 
00507 sub display_id {
00508   my $self = shift;
00509   return $self->stable_id || $self->dbID || '';
00510 }
00511 
00512 =head2 get_all_Attributes
00513 
00514   Arg [1]    : none
00515   Example    :
00516   Description: DOES NOTHING, Returns empty listref. Provided here to prevent
00517                Transcript attributes being returned for PredictionTranscripts.
00518   Returntype : EMPTY listref Bio::EnsEMBL::Attribute
00519   Exceptions : none
00520   Caller     : general
00521   Status     : At risk
00522 
00523 =cut
00524 
00525 sub get_all_Attributes {
00526   my $self = shift;
00527 
00528   return [];
00529 }
00530 
00531 
00532 
00533 =head2 get_exon_count
00534 
00535   Description: DEPRECATED - use get_all_Exons instead
00536 
00537 =cut
00538 
00539 sub get_exon_count {
00540    my $self = shift;
00541    deprecate('Use scalar(@{$transcript->get_all_Exon()s}) instead');
00542    return scalar( @{$self->get_all_Exons} );
00543 }
00544 
00545 
00546 =head2 set_exon_count
00547 
00548   Description: DEPRECATED - this method does nothing now
00549 
00550 =cut
00551 
00552 sub set_exon_count {
00553   deprecate('This method no longer does anything.');
00554 }
00555 
00556 
00557 
00558 =head2 get_cdna
00559 
00560   Description : DEPRECATED - use spliced_seq() or translateable_seq instead
00561 
00562 =cut
00563 
00564 sub get_cdna {
00565   my $self = shift;
00566   deprecate('use spliced_seq instead');
00567   return $self->spliced_seq();
00568 }
00569 
00570 1;