Archive Ensembl HomeArchive Ensembl Home
SeqFeature.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature.
00024 
00025 =head1 DESCRIPTION
00026 
00027 Do not use this module if you can avoid it. It has been replaced by
00028 Bio::EnsEMBL::Feature.  This module has a long history of usage but has
00029 become very bloated, and quite unweildy.  It was decided to replace
00030 it completely with a smaller, light-weight feature class rather than
00031 attempting to refactor this class, and maintain strict backwards
00032 compatibility.
00033 
00034 Part of the complexity of this class was in its extensive
00035 inheritance. As an example the following is a simplified inheritance
00036 heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature:
00037 
00038   Bio::EnsEMBL::DnaAlignFeature
00039   Bio::EnsEMBL::BaseAlignFeature
00040   Bio::EnsEMBL::FeaturePair
00041   Bio::EnsEMBL::SeqFeature
00042   Bio::EnsEMBL::SeqFeatureI
00043   Bio::SeqFeatureI
00044   Bio::RangeI
00045   Bio::Root::RootI
00046 
00047 The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much
00048 easier to understand and maintain.
00049 
00050 =head1 METHODS
00051 
00052 =cut
00053 
00054 
00055 # Let the code begin...
00056 
00057 
00058 package Bio::EnsEMBL::SeqFeature;
00059 
00060 use vars qw(@ISA);
00061 use strict;
00062 
00063 
00064 use Bio::EnsEMBL::SeqFeatureI;
00065 use Bio::EnsEMBL::Analysis;
00066 use Bio::EnsEMBL::Root;
00067 
00068 @ISA = qw(Bio::EnsEMBL::Root Bio::EnsEMBL::SeqFeatureI);
00069 
00070 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00071 
00072 sub new {
00073   my($caller,@args) = @_;
00074 
00075   my $self = {};
00076 
00077   if(ref $caller) {
00078     bless $self, ref $caller;
00079   } else {
00080     bless $self, $caller;
00081   }
00082 
00083   $self->{'_gsf_tag_hash'} = {};
00084   $self->{'_gsf_sub_array'} = [];
00085   $self->{'_parse_h'} = {};
00086   $self->{'_is_splittable'} = 0;
00087 
00088   my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag,
00089       $primary_tag, $percent_id, $p_value, $phase, $end_phase) =
00090 
00091       &rearrange([qw(START
00092                             END
00093                             STRAND
00094                             FRAME
00095                             SCORE
00096                             ANALYSIS
00097                             SEQNAME
00098                             SOURCE_TAG
00099                             PRIMARY_TAG
00100                             PERCENT_ID
00101                             P_VALUE
00102                             PHASE
00103                             END_PHASE
00104                             )],@args);
00105 
00106   #  $gff_string && $self->_from_gff_string($gff_string);
00107 
00108   if ( defined $analysis  && $analysis ne "")   { $self->analysis($analysis)};
00109   if ( defined ($start) && $start ne "" )       { $self->start($start)};
00110   if ( defined ($end )  && $end   ne "" )       { $self->end($end)}
00111   if ( defined $strand  && $strand ne "")       { $self->strand($strand)}
00112   if ( defined $frame  && $frame ne "")         { $self->frame($frame)}
00113   if ( defined $score  && $score ne "")         { $self->score($score)}
00114   if ( defined $seqname && $seqname ne "")      { $self->seqname($seqname)};
00115   if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)};
00116   if ( defined $p_value && $p_value ne "")      { $self->p_value($p_value)};
00117   if ( defined $phase && $phase ne "")          { $self->phase($phase)};
00118   if ( defined $end_phase && $end_phase ne "")  { $self->end_phase($end_phase)};
00119 
00120   return $self; # success - we hope!
00121 
00122 }
00123 
00124 
00125 
00126 
00127 
00128 
00129 =head2 start
00130 
00131  Title   : start
00132  Usage   : $start = $feat->start
00133            $feat->start(20)
00134  Function: Get/set on the start coordinate of the feature
00135  Returns : integer
00136  Args    : none
00137 
00138 
00139 =cut
00140 
00141 sub start{
00142     my ($self,$value) = @_;
00143 
00144     if (defined($value)) {
00145         if ($value !~ /^\-?\d+/ ) {
00146         $self->throw("$value is not a valid start");
00147     }
00148     
00149     $self->{'_gsf_start'} = $value
00150    }
00151 
00152     return $self->{'_gsf_start'};
00153 
00154 }
00155 
00156 =head2 end
00157 
00158  Title   : end
00159  Usage   : $end = $feat->end
00160            $feat->end($end)
00161  Function: get/set on the end coordinate of the feature
00162  Returns : integer
00163  Args    : none
00164 
00165 
00166 =cut
00167 
00168 sub end{
00169     my ($self,$value) = @_;
00170 
00171     if (defined($value)) {
00172         if( $value !~ /^\-?\d+/ ) {
00173             $self->throw("[$value] is not a valid end");
00174         }
00175     
00176         $self->{'_gsf_end'} = $value;
00177     }
00178 
00179    return $self->{'_gsf_end'};
00180 }
00181 
00182 =head2 length
00183 
00184  Title   : length
00185  Usage   :
00186  Function:
00187  Example :
00188  Returns :
00189  Args    :
00190 
00191 
00192 =cut
00193 
00194 sub length{
00195    my ($self,@args) = @_;
00196 
00197    return $self->end - $self->start +1;
00198 }
00199 
00200 
00201 =head2 strand
00202 
00203  Title   : strand
00204  Usage   : $strand = $feat->strand()
00205            $feat->strand($strand)
00206  Function: get/set on strand information, being 1,-1 or 0
00207  Returns : -1,1 or 0
00208  Args    : none
00209 
00210 
00211 =cut
00212 
00213 sub strand {
00214     my ($self,$value) = @_;
00215 
00216     if (defined($value)) {
00217         if( $value eq '+' ) { $value = 1; }
00218         if( $value eq '-' ) { $value = -1; }
00219         if( $value eq '.' ) { $value = 0; }
00220 
00221         if( $value != -1 && $value != 1 && $value != 0 ) {
00222             $self->throw("$value is not a valid strand info");
00223         }
00224         $self->{'_gsf_strand'} = $value;
00225     }
00226 
00227     return $self->{'_gsf_strand'};
00228 }
00229 
00230 
00231 =head2 move
00232 
00233   Arg [1]    : int $start
00234   Arg [2]    : int $end
00235   Arg [3]    : (optional) int $strand 
00236   Example    : $feature->move(100, 200, -1);
00237   Description: Moves a feature to a different location.  This is faster
00238                then calling 3 seperate accesors in a large loop.
00239   Returntype : none
00240   Exceptions : none
00241   Caller     : BaseFeatureAdaptor
00242 
00243 =cut
00244 
00245 sub move {
00246   my ($self, $start, $end, $strand)  = @_;
00247 
00248   $self->{'_gsf_start'} = $start;
00249   $self->{'_gsf_end'} = $end;
00250   if(defined $strand) {
00251     $self->{'_gsf_strand'} = $strand;
00252   }
00253 }
00254 
00255 
00256 =head2 score
00257 
00258  Title   : score
00259  Usage   : $score = $feat->score()
00260            $feat->score($score)
00261  Function: get/set on score information
00262  Returns : float
00263  Args    : none if get, the new value if set
00264 
00265 
00266 =cut
00267 
00268 sub score {
00269     my ($self,$value) = @_;
00270 
00271     if(defined ($value) ) {
00272       if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) {
00273           $self->throw("'$value' is not a valid score");
00274       }
00275       $self->{'_gsf_score'} = $value;
00276   }
00277 
00278   return $self->{'_gsf_score'};
00279 }
00280 
00281 =head2 frame
00282 
00283  Title   : frame
00284  Usage   : $frame = $feat->frame()
00285            $feat->frame($frame)
00286  Function: get/set on frame information
00287  Returns : 0,1,2
00288  Args    : none if get, the new value if set
00289 
00290 
00291 =cut
00292 
00293 sub frame {
00294     my ($self,$value) = @_;
00295 
00296     if (defined($value)) {
00297         if( $value != 1 && $value != 2 && $value != 3 ) {
00298             $self->throw("'$value' is not a valid frame");
00299        }
00300         $self->{'_gsf_frame'} = $value;
00301     }
00302 
00303     return $self->{'_gsf_frame'};
00304 }
00305 
00306 =head2 primary_tag
00307 
00308  Title   : primary_tag
00309  Usage   : $tag = $feat->primary_tag()
00310            $feat->primary_tag('exon')
00311  Function: get/set on the primary tag for a feature,
00312            eg 'exon'
00313  Returns : a string
00314  Args    : none
00315 
00316 
00317 =cut
00318 
00319 sub primary_tag{
00320   my ($self,$arg) = @_;
00321   
00322   if (defined($arg)) {
00323     # throw warnings about setting primary tag
00324     my ($p,$f,$l) = caller;
00325     $self->warn("$f:$l setting primary_tag now deprecated." .
00326         "Primary tag is delegated to analysis object");
00327   }
00328 
00329   unless($self->analysis) {
00330     return '';
00331   }
00332   
00333   return $self->analysis->gff_feature();
00334 }
00335 
00336 =head2 source_tag
00337 
00338  Title   : source_tag
00339  Usage   : $tag = $feat->source_tag()
00340            $feat->source_tag('genscan');
00341  Function: Returns the source tag for a feature,
00342            eg, 'genscan'
00343  Returns : a string
00344  Args    : none
00345 
00346 
00347 =cut
00348 
00349 sub source_tag{
00350     my ($self,$arg) = @_;
00351 
00352     if (defined($arg)) {
00353         # throw warnings about setting primary tag
00354         my ($p,$f,$l) = caller;
00355         $self->warn("$f:$l setting source_tag now deprecated. " .
00356             "Source tag is delegated to analysis object");
00357     }
00358 
00359     unless($self->analysis) {
00360       return "";
00361     }
00362 
00363    return $self->analysis->gff_source();
00364 }
00365 
00366 
00367 =head2 analysis
00368 
00369  Title   : analysis
00370  Usage   : $sf->analysis();
00371  Function: Store details of the program/database
00372            and versions used to create this feature.
00373 
00374  Example :
00375  Returns :
00376  Args    :
00377 
00378 
00379 =cut
00380 
00381 sub analysis {
00382    my ($self,$value) = @_;
00383 
00384    if (defined($value)) {
00385      unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) {
00386        $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object "
00387             . "but a $value object");
00388      }
00389 
00390      $self->{_analysis} = $value;
00391    } else {
00392      #if _analysis is not defined, create a new analysis object
00393      unless(defined $self->{_analysis}) {
00394        $self->{_analysis} = new Bio::EnsEMBL::Analysis();
00395      }
00396    }
00397 
00398    return $self->{_analysis};
00399 }
00400 
00401 =head2 validate
00402 
00403  Title   : validate
00404  Usage   : $sf->validate;
00405  Function: Checks whether all the data is present in the
00406            object.
00407  Example :
00408  Returns :
00409  Args    :
00410 
00411 
00412 =cut
00413 
00414 sub validate {
00415     my ($self) = @_;
00416 
00417     $self->vthrow("Seqname not defined in feature")     unless defined($self->seqname);
00418     $self->vthrow("start not defined in feature")       unless defined($self->start);
00419     $self->vthrow("end not defined in feature")         unless defined($self->end);
00420     $self->vthrow("strand not defined in feature")      unless defined($self->strand);
00421     $self->vthrow("score not defined in feature")       unless defined($self->score);
00422     $self->vthrow("analysis not defined in feature")    unless defined($self->analysis);
00423 
00424     if ($self->end < $self->start) {
00425       $self->vthrow("End coordinate < start coordinate");
00426     }
00427 
00428 }
00429 
00430 
00431 
00432 sub vthrow {
00433     my ($self,$message) = @_;
00434 
00435     print(STDERR "Error validating feature [$message]\n");
00436     print(STDERR "   Seqname     : [" . $self->{_seqname} . "]\n");
00437     print(STDERR "   Start       : [" . $self->{_gsf_start} . "]\n");
00438     print(STDERR "   End         : [" . $self->{_gsf_end} . "]\n");
00439     print(STDERR "   Strand      : [" .
00440         ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n");
00441 
00442     print(STDERR "   Score       : [" . $self->{_gsf_score} . "]\n");
00443 
00444     print(STDERR "   Analysis    : [" . $self->{_analysis}->dbID . "]\n");
00445 
00446     $self->throw("Invalid feature - see dump on STDERR");
00447 }
00448 
00449 
00450 =head2 validate_prot_feature
00451 
00452  Title   : validate_prot_feature
00453  Usage   :
00454  Function:
00455  Example :
00456  Returns :
00457  Args    :
00458 
00459 
00460 =cut
00461 
00462 # Shouldn't this go as "validate" into Pro_SeqFeature?
00463 sub validate_prot_feature{
00464     my ($self,$num) = @_;
00465     $self->throw("Seqname not defined in feature")     unless defined($self->seqname);
00466     $self->throw("start not defined in feature")       unless defined($self->start);
00467     $self->throw("end not defined in feature")         unless defined($self->end);
00468     if ($num == 1) {
00469         $self->throw("score not defined in feature")       unless defined($self->score);
00470         $self->throw("percent_id not defined in feature") unless defined($self->percent_id);
00471         $self->throw("evalue not defined in feature") unless defined($self->p_value);
00472     }
00473     $self->throw("analysis not defined in feature")    unless defined($self->analysis);
00474 }
00475 
00476 
00477 
00478 # These methods are specified in the SeqFeatureI interface but we don't want
00479 # people to store data in them.  These are just here in order to keep
00480 # existing code working
00481 
00482 
00483 =head2 has_tag
00484 
00485  Title   : has_tag
00486  Usage   : $value = $self->has_tag('some_tag')
00487  Function: Returns the value of the tag (undef if
00488            none)
00489  Returns :
00490  Args    :
00491 
00492 
00493 =cut
00494 
00495 sub has_tag{
00496    my ($self,$tag) = (shift, shift);
00497 
00498    return exists $self->{'_gsf_tag_hash'}->{$tag};
00499 }
00500 
00501 =head2 add_tag_value
00502 
00503  Title   : add_tag_value
00504  Usage   : $self->add_tag_value('note',"this is a note");
00505  Returns : nothing
00506  Args    : tag (string) and value (any scalar)
00507 
00508 
00509 =cut
00510 
00511 sub add_tag_value{
00512    my ($self,$tag,$value) = @_;
00513 
00514    if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) {
00515        $self->{'_gsf_tag_hash'}->{$tag} = [];
00516    }
00517 
00518    push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value);
00519 }
00520 
00521 =head2 each_tag_value
00522 
00523  Title   : each_tag_value
00524  Usage   :
00525  Function:
00526  Example :
00527  Returns :
00528  Args    :
00529 
00530 
00531 =cut
00532 
00533 sub each_tag_value {
00534    my ($self,$tag) = @_;
00535    if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
00536        $self->throw("asking for tag value that does not exist $tag");
00537    }
00538 
00539    return @{$self->{'_gsf_tag_hash'}->{$tag}};
00540 }
00541 
00542 
00543 =head2 all_tags
00544 
00545  Title   : all_tags
00546  Usage   : @tags = $feat->all_tags()
00547  Function: gives all tags for this feature
00548  Returns : an array of strings
00549  Args    : none
00550 
00551 
00552 =cut
00553 
00554 sub all_tags{
00555    my ($self,@args) = @_;
00556 
00557    return keys %{$self->{'_gsf_tag_hash'}};
00558 }
00559 
00560 
00561 
00562 =head2 seqname
00563 
00564   Arg [1]    : string $seqname
00565   Example    : $seqname = $self->seqname();
00566   Description: Obtains the seqname of this features sequence.  This is set
00567                automatically when a sequence with a name is attached, or may
00568                be set manually.
00569   Returntype : string
00570   Exceptions : none
00571   Caller     : general, attach_seq
00572 
00573 =cut
00574 
00575 sub seqname{
00576    my ($self,$seqname) = @_;
00577 
00578    my $seq = $self->contig();
00579 
00580    if(defined $seqname) {
00581      $self->{_seqname} = $seqname;
00582    } else {
00583      if($seq && ref $seq && $seq->can('name')) {
00584        $self->{_seqname} = $seq->name();
00585      } 
00586    }
00587 
00588    return $self->{_seqname};
00589 }
00590 
00591 
00592 =head2 attach_seq
00593 
00594  Title   : attach_seq
00595  Usage   : $sf->attach_seq($seq)
00596  Function: Attaches a Bio::PrimarySeqI object to this feature. This
00597            Bio::PrimarySeqI object is for the *entire* sequence: ie
00598            from 1 to 10000
00599  Example :
00600  Returns :
00601  Args    :
00602 
00603 
00604 =cut
00605 
00606 sub attach_seq{
00607    my ($self, $seq) = @_;
00608 
00609    $self->contig($seq);
00610 }
00611 
00612 =head2 seq
00613 
00614  Example : $tseq = $sf->seq()
00615  Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature
00616  Returns : a Bio::PrimarySeq object (I reckon)
00617 
00618 =cut
00619 
00620 sub seq{
00621    my ($self,$arg) = @_;
00622 
00623    if( defined $arg ) {
00624        $self->throw("Calling SeqFeature::Generic->seq with an argument. " .
00625             "You probably want attach_seq");
00626    }
00627 
00628    if( ! exists $self->{'_gsf_seq'} ) {
00629        return undef;
00630    }
00631 
00632    # assumming our seq object is sensible, it should not have to yank
00633    # the entire sequence out here.
00634 
00635    my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end());
00636 
00637 
00638    if( $self->strand == -1 ) {
00639 
00640        # ok. this does not work well (?)
00641        #print STDERR "Before revcom", $seq->str, "\n";
00642        $seq = $seq->revcom;
00643        #print STDERR "After  revcom", $seq->str, "\n";
00644    }
00645 
00646    return $seq;
00647 }
00648 
00649 =head2 entire_seq
00650 
00651  Title   : entire_seq
00652  Usage   : $whole_seq = $sf->entire_seq()
00653  Function: gives the entire sequence that this seqfeature is attached to
00654  Example :
00655  Returns :
00656  Args    :
00657 
00658 
00659 =cut
00660 
00661 sub entire_seq{
00662    my ($self) = @_;
00663 
00664    return $self->contig;
00665 }
00666 
00667 
00668 =head2 sub_SeqFeature
00669 
00670  Title   : sub_SeqFeature
00671  Usage   : @feats = $feat->sub_SeqFeature();
00672  Function: Returns an array of sub Sequence Features
00673  Returns : An array
00674  Args    : none
00675 
00676 
00677 =cut
00678 
00679 sub sub_SeqFeature {
00680   my ($self) = @_;
00681 
00682   if ( $self->{'_gsf_sub_array'} ) {
00683     return @{ $self->{'_gsf_sub_array'} };
00684   } else {
00685     return ();
00686   }
00687 }
00688 
00689 =head2 add_sub_SeqFeature
00690 
00691  Title   : add_sub_SeqFeature
00692  Usage   : $feat->add_sub_SeqFeature($subfeat);
00693            $feat->add_sub_SeqFeature($subfeat,'EXPAND')
00694  Function: adds a SeqFeature into the subSeqFeature array.
00695            with no 'EXPAND' qualifer, subfeat will be tested
00696            as to whether it lies inside the parent, and throw
00697            an exception if not.
00698 
00699            If EXPAND is used, the parents start/end/strand will
00700            be adjusted so that it grows to accommodate the new
00701            subFeature
00702  Returns : nothing
00703  Args    : An object which has the SeqFeatureI interface
00704 
00705 
00706 =cut
00707 
00708 sub add_sub_SeqFeature{
00709    my ($self,$feat,$expand) = @_;
00710 
00711    if( !$feat->isa('Bio::SeqFeatureI') ) {
00712        $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
00713    }
00714 
00715    if( $expand eq 'EXPAND' ) {
00716        # if this doesn't have start/end set - forget it!
00717        if( !defined $self->start && !defined $self->end ) {
00718            $self->start($feat->start());
00719            $self->end($feat->end());
00720            $self->strand($feat->strand);
00721        } else {
00722            my ($start,$end);
00723            if( $feat->start < $self->start ) {
00724                $start = $feat->start;
00725            }
00726 
00727            if( $feat->end > $self->end ) {
00728                $end = $feat->end;
00729            }
00730 
00731            $self->start($start);
00732            $self->end($end);
00733 
00734        }
00735    } else {
00736      if( !defined($feat->start()) || !defined($feat->end()) ||
00737          !defined($self->start())  || !defined($self->end())) {
00738        $self->throw( "This SeqFeature and the sub_SeqFeature must define".
00739                      " start and end.");
00740      }
00741      if($feat->start() > $feat->end() || $self->start() > $self->end()) {
00742        $self->throw("This SeqFeature and the sub_SeqFeature must have " .
00743                     "start that is less than or equal to end.");
00744      }
00745      if($feat->start() < $self->start() || $feat->end() > $self->end() ) {
00746        $self->throw("$feat is not contained within parent feature, " .
00747                     "and expansion is not valid");
00748      }
00749    }
00750 
00751    push(@{$self->{'_gsf_sub_array'}},$feat);
00752 
00753 }
00754 
00755 =head2 flush_sub_SeqFeature
00756 
00757  Title   : flush_sub_SeqFeature
00758  Usage   : $sf->flush_sub_SeqFeature
00759  Function: Removes all sub SeqFeature
00760            (if you want to remove only a subset, take
00761             an array of them all, flush them, and add
00762             back only the guys you want)
00763  Example :
00764  Returns : none
00765  Args    : none
00766 
00767 
00768 =cut
00769 
00770 sub flush_sub_SeqFeature {
00771    my ($self) = @_;
00772 
00773    $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
00774 }
00775 
00776 
00777 sub id {
00778     my ($self,$value) = @_;
00779 
00780     if (defined($value)) {
00781         $self->{_id} = $value;
00782     }
00783 
00784     return $self->{_id};
00785 
00786 }
00787 
00788 =head2 percent_id
00789 
00790  Title   : percent_id
00791  Usage   : $pid = $feat->percent_id()
00792            $feat->percent_id($pid)
00793  Function: get/set on percentage identity information
00794  Returns : float
00795  Args    : none if get, the new value if set
00796 
00797 =cut
00798 
00799 sub percent_id {
00800     my ($self,$value) = @_;
00801 
00802     if (defined($value))
00803     {
00804             $self->{_percent_id} = $value;
00805     }
00806 
00807     return $self->{_percent_id};
00808 }
00809 
00810 =head2 p_value
00811 
00812  Title   : p_value
00813  Usage   : $p_val = $feat->p_value()
00814            $feat->p_value($p_val)
00815  Function: get/set on p value information
00816  Returns : float
00817  Args    : none if get, the new value if set
00818 
00819 =cut
00820 
00821 sub p_value {
00822     my ($self,$value) = @_;
00823 
00824     if (defined($value))
00825     {
00826             $self->{_p_value} = $value;
00827     }
00828 
00829     return $self->{_p_value};
00830 }
00831 
00832 =head2 phase
00833 
00834  Title   : phase
00835  Usage   : $phase = $feat->phase()
00836            $feat->phase($phase)
00837  Function: get/set on start phase of predicted exon feature
00838  Returns : [0,1,2]
00839  Args    : none if get, 0,1 or 2 if set.
00840 
00841 =cut
00842 
00843 sub phase {
00844     my ($self, $value) = @_;
00845 
00846     if (defined($value) )
00847     {
00848         $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
00849             $self->{_phase} = $value;
00850     }
00851 
00852     return $self->{_phase};
00853 }
00854 
00855 =head2 end_phase
00856 
00857  Title   : end_phase
00858  Usage   : $end_phase = $feat->end_phase()
00859            $feat->end_phase($end_phase)
00860  Function: returns end_phase based on phase and length of feature
00861  Returns : [0,1,2]
00862  Args    : none if get, 0,1 or 2 if set.
00863 
00864 =cut
00865 
00866 sub end_phase {
00867    my ($self, $value) = @_;
00868 
00869     if (defined($value))
00870     {
00871             $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
00872             $self->{_end_phase} = $value;
00873     }
00874 
00875     return $self->{_end_phase};
00876 }
00877 
00878 
00879 sub gffstring {
00880    my ($self) = @_;
00881 
00882    my $str;
00883 
00884    my $strand = "+";
00885    
00886    if ((defined $self->strand)&&($self->strand == -1)) {
00887      $strand = "-";
00888    }
00889    
00890    $str .= (defined $self->seqname)     ?   $self->seqname."\t"      :  "\t";
00891    $str .= (defined $self->source_tag)  ?   $self->source_tag."\t"   :  "\t";
00892    $str .= (defined $self->primary_tag) ?   $self->primary_tag."\t"  :  "\t";
00893    $str .= (defined $self->start)       ?   $self->start."\t"        :  "\t";
00894    $str .= (defined $self->end)         ?   $self->end."\t"          :  "\t";
00895    $str .= (defined $self->score)       ?   $self->score."\t"        :  "\t";
00896    $str .= (defined $self->strand)      ?   $strand."\t"             :  ".\t";
00897    $str .= (defined $self->phase)       ?   $self->phase."\t"        :  ".\t";
00898    eval{
00899      $str .= (defined $self->end_phase)   ?   $self->end_phase."\t"        :  ".\t";
00900    };
00901 
00902    return $str;
00903 }
00904 
00905 
00906 =head2 external_db
00907 
00908  Title   : external_db
00909  Usage   : $pid = $feat->external_db()
00910            $feat->external_db($dbid)
00911  Function: get/set for an external db accession number (e.g.: Interpro)
00912  Returns :
00913  Args    : none if get, the new value if set
00914 
00915 =cut
00916 
00917 sub external_db {
00918     my ($self,$value) = @_;
00919 
00920     if (defined($value))
00921     {
00922             $self->{'_external_db'} = $value;
00923     }
00924 
00925     return $self->{'_external_db'};
00926 }
00927 
00928 
00929 
00930 =head2 contig
00931 
00932   Arg [1]    : Bio::PrimarySeqI $seq
00933   Example    : $seq = $self->contig;
00934   Description: Accessor to attach/retrieve a sequence to/from a feature
00935   Returntype : Bio::PrimarySeqI
00936   Exceptions : none
00937   Caller     : general
00938 
00939 =cut
00940 
00941 sub contig {
00942   my ($self, $arg) = @_;
00943 
00944   if ($arg) {
00945     unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) {
00946       $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
00947     }
00948     
00949     $self->{'_gsf_seq'} = $arg;
00950     
00951     # attach to sub features if they want it
00952     
00953     foreach my $sf ($self->sub_SeqFeature) {
00954       if ($sf->can("attach_seq")) {
00955     $sf->attach_seq($arg);
00956       }
00957     }
00958   }
00959   #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'});
00960 #  my ($p, $f, $l) = caller;
00961 #  print STDERR "Caller = ".$f." ".$l."\n";
00962   return $self->{'_gsf_seq'};
00963 }
00964 
00965 
00966 
00967 sub is_splittable {
00968    my ($self, $arg) = @_;
00969 
00970    if (defined $arg) {
00971        $self->{'_is_splittable'} = $arg;
00972    }
00973    return $self->{'_is_splittable'};
00974 }
00975 
00976 
00977 sub transform {
00978   my ($self, $slice) = @_;
00979 
00980   unless (defined $slice) {
00981 
00982     if ((defined $self->contig) &&
00983       ($self->contig->isa("Bio::EnsEMBL::RawContig"))) {
00984 
00985       # we are already in rawcontig coords, nothing needs to be done
00986       return $self;
00987 
00988     }
00989     else {
00990       # transform to raw_contig coords from Slice coords
00991       return $self->_transform_to_RawContig();
00992     }
00993   }
00994 
00995   if (defined $self->contig) {
00996 
00997     if ($self->contig->isa("Bio::EnsEMBL::RawContig"))  {
00998 
00999       # transform to slice coords from raw contig coords
01000       return $self->_transform_to_Slice($slice);
01001     }
01002     elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) {
01003 
01004       # transform to slice coords from other slice coords
01005       return $self->_transform_between_Slices($slice);
01006     }
01007     else {
01008 
01009       # Unknown contig type
01010       $self->throw("Cannot transform unknown contig type @{[$self->contig]}");
01011     }
01012   }
01013   else {
01014 
01015     #Can't convert to slice coords without a contig to work with
01016     return $self->throw("Object's contig is not defined - cannot transform");
01017   }
01018 
01019 }
01020 
01021 
01022 sub _transform_to_Slice {
01023   my ($self, $slice) = @_;
01024 
01025   $self->throw("can't transform coordinates of $self without a contig defined")
01026     unless $self->contig;
01027 
01028   unless($self->contig->adaptor) {
01029     $self->throw("cannot transform coordinates of $self without adaptor " .
01030          "attached to contig");
01031   }
01032 
01033   my $dbh = $self->contig->adaptor->db;
01034 
01035   my $mapper = 
01036     $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
01037   my $rca = $dbh->get_RawContigAdaptor;
01038 
01039   my @mapped = $mapper->map_coordinates_to_assembly(
01040     $self->contig->dbID,
01041     $self->start,
01042     $self->end,
01043     $self->strand
01044   );
01045 
01046   unless (@mapped) {
01047     $self->throw("couldn't map $self to Slice");
01048   }
01049 
01050   unless (@mapped == 1) {
01051     $self->throw("$self should only map to one chromosome - " .
01052          "something bad has happened ...");
01053   }
01054 
01055   if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
01056     $self->warn("feature lies on gap\n");
01057     return;
01058   }
01059 
01060   if( ! defined $slice->chr_name() ) {
01061     my $slice_adaptor = $slice->adaptor();
01062     %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )};
01063   }
01064    
01065   # mapped coords are on chromosome - need to convert to slice
01066   if($slice->strand == 1) {
01067     $self->start  ($mapped[0]->start - $slice->chr_start + 1);
01068     $self->end    ($mapped[0]->end   - $slice->chr_start + 1);
01069     $self->strand ($mapped[0]->strand);
01070   } else {
01071     $self->start  ($slice->chr_end - $mapped[0]->end   + 1);
01072     $self->end    ($slice->chr_end - $mapped[0]->start + 1);
01073     $self->strand ($mapped[0]->strand * -1);
01074   }
01075 
01076   $self->seqname($mapped[0]->id);
01077 
01078   #set the contig to the slice
01079   $self->contig($slice);
01080 
01081   return $self;
01082 }
01083 
01084 
01085 sub _transform_between_Slices {
01086   my ($self, $to_slice) = @_;
01087 
01088   my $from_slice = $self->contig;
01089 
01090   $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice")
01091    unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") );
01092 
01093   if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) {
01094     $self->warn("Can't transform between chromosomes: $c1 and $c2");
01095     return;
01096   }
01097 
01098   my($start, $end, $strand);
01099 
01100   #first convert to assembly coords
01101   if($from_slice->strand == 1) {
01102     $start  = $from_slice->chr_start + $self->start - 1;
01103     $end    = $from_slice->chr_start + $self->end   - 1;
01104     $strand = $self->strand;
01105   } else {
01106     $start  = $from_slice->chr_end - $self->end   + 1;
01107     $end    = $from_slice->chr_end - $self->start + 1;
01108     $strand = $self->strand;
01109   }
01110 
01111   #now convert to the other slice's coords 
01112   if($to_slice->strand == 1) {
01113     $self->start ($start - $to_slice->chr_start + 1); 
01114     $self->end   ($end   - $to_slice->chr_start + 1); 
01115     $self->strand($strand);
01116   } else {
01117     $self->start ($to_slice->chr_end - $end   + 1);
01118     $self->end   ($to_slice->chr_end - $start + 1);
01119     $self->strand($strand * -1);
01120   }
01121 
01122   $self->contig($to_slice);
01123 
01124   return $self;
01125 }
01126 
01127 
01128 sub _transform_to_RawContig {
01129   my($self) = @_;
01130 
01131   #print STDERR "transforming ".$self." to raw contig coords\n";
01132   $self->throw("can't transform coordinates of $self without a contig defined")
01133    unless $self->contig;
01134 
01135   my $slice = $self->contig;
01136 
01137   unless($slice->adaptor) {
01138     $self->throw("can't transform coordinates of $self without an adaptor " .
01139          "attached to the feature's slice");
01140   }
01141 
01142   my $dbh = $slice->adaptor->db;
01143 
01144   my $mapper = 
01145     $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
01146   my $rca = $dbh->get_RawContigAdaptor;
01147 
01148   #first convert the features coordinates to assembly coordinates
01149   my($start, $end, $strand);
01150   if($slice->strand == 1) {
01151     $start  = $slice->chr_start + $self->start - 1;
01152     $end    = $slice->chr_start + $self->end   - 1;
01153     $strand = $self->strand;
01154   } else {
01155     $start = $slice->chr_end - $self->end   + 1;
01156     $end   = $slice->chr_end - $self->start + 1;
01157     $strand = $self->strand * -1;
01158   }
01159 
01160   #convert the assembly coordinates to RawContig coordinates
01161   my @mapped = $mapper->map_coordinates_to_rawcontig(
01162     $slice->chr_name,
01163     $start,
01164     $end,
01165     $strand
01166   );
01167 
01168   unless (@mapped) {
01169     $self->throw("couldn't map $self");
01170     return $self;
01171   }
01172 
01173   if (@mapped == 1) {
01174 
01175     if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
01176       $self->warn("feature lies on gap\n");
01177       return;
01178     }
01179 
01180     my $rc = $rca->fetch_by_dbID($mapped[0]->id);
01181 
01182     $self->start     ($mapped[0]->start);
01183     $self->end       ($mapped[0]->end);
01184     $self->strand    ($mapped[0]->strand);
01185     $self->seqname   ($mapped[0]->id);
01186     #print STDERR "setting contig to be ".$mapped[0]->id."\n";
01187     $self->contig($rca->fetch_by_dbID($mapped[0]->id));
01188 
01189     return $self;
01190   }
01191   else {
01192 
01193     # more than one object returned from mapper
01194     # possibly more than one RawContig in region
01195 
01196     my (@gaps, @coords);
01197 
01198     foreach my $m (@mapped) {
01199 
01200     if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) {
01201         push @gaps, $m;
01202     }
01203     elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
01204         push @coords, $m;
01205     }
01206     }
01207 
01208     # case where only one RawContig maps
01209     if (@coords == 1) {
01210 
01211         $self->start  ($coords[0]->start);
01212         $self->end    ($coords[0]->end);
01213         $self->strand ($coords[0]->strand);
01214         $self->seqname($coords[0]->id);
01215     #print STDERR "2 setting contig to be ".$coords[0]->id."\n";
01216         $self->contig ($rca->fetch_by_dbID($coords[0]->id));
01217 
01218     $self->warn("Feature [$self] truncated as lies partially on a gap");
01219         return $self;
01220     }
01221 
01222     unless ($self->is_splittable) {
01223         $self->warn("Feature spans >1 raw contig - can't split\n");
01224         return;
01225     }
01226 
01227     my @out;
01228     my $obj = ref $self;
01229 
01230     SPLIT: foreach my $map (@mapped) {
01231 
01232       if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) {
01233     $self->warn("piece of evidence lies on gap\n");
01234     next SPLIT;
01235       }
01236 
01237       my $feat = $obj->new;
01238 
01239       $feat->start  ($map->start);
01240       $feat->end    ($map->end);
01241       $feat->strand ($map->strand);
01242       #print STDERR "3 setting contig to be ".$mapped[0]->id."\n";
01243       $feat->contig ($rca->fetch_by_dbID($map->id));
01244       $feat->adaptor($self->adaptor) if $self->adaptor();
01245       $feat->display_label($self->display_label) if($self->can('display_label'));
01246       $feat->analysis($self->analysis);
01247       push @out, $feat;
01248     }
01249     
01250     return @out;
01251   }
01252 }
01253 
01254 
01255 1;