Archive Ensembl HomeArchive Ensembl Home
Member.pm
Go to the documentation of this file.
00001 package Bio::EnsEMBL::Compara::Member;
00002 
00003 use strict;
00004 use Bio::Seq;
00005 use Bio::EnsEMBL::Utils::Argument;
00006 use Bio::EnsEMBL::Utils::Exception;
00007 use Bio::EnsEMBL::Gene;
00008 use Bio::EnsEMBL::Compara::GenomeDB;
00009 
00010 
00011 =head2 new (CONSTRUCTOR)
00012 
00013     Arg [-DBID] : (opt) 
00014         : int $dbID (the database internal ID for this object)
00015     Arg [-ADAPTOR] 
00016         : Bio::EnsEMBL::Compara::DBSQL::Member $adaptor
00017                 (the adaptor for connecting to the database)
00018     Arg [-DESCRIPTION] (opt) 
00019          : string $description
00020     Arg [-SOURCE_NAME] (opt) 
00021          : string $source_name 
00022          (e.g., "ENSEMBLGENE", "ENSEMBLPEP", "Uniprot/SWISSPROT", "Uniprot/SPTREMBL")
00023     Arg [-TAXON_ID] (opt)
00024          : int $taxon_id
00025          (NCBI taxonomy id for the member)
00026     Arg [-GENOME_DB_ID] (opt)
00027         : int $genome_db_id
00028         (the $genome_db->dbID for a species in the database)
00029     Arg [-SEQUENCE_ID] (opt)
00030         : int $sequence_id
00031         (the $sequence_id for the sequence table in the database)
00032     Example :
00033     my $member = new Bio::EnsEMBL::Compara::Member;
00034        Description: Creates a new Member object
00035        Returntype : Bio::EnsEMBL::Compara::Member
00036        Exceptions : none
00037        Caller     : general
00038        Status     : Stable
00039 
00040 =cut
00041 
00042 sub new {
00043   my ($class, @args) = @_;
00044 
00045   my $self = bless {}, $class;
00046   
00047   if (scalar @args) {
00048     #do this explicitly.
00049     my ($dbid, $stable_id, $description, $source_name, $adaptor, $taxon_id, $genome_db_id, $sequence_id) = rearrange([qw(DBID STABLE_ID DESCRIPTION SOURCE_NAME ADAPTOR TAXON_ID GENOME_DB_ID SEQUENCE_ID)], @args);
00050 
00051     $dbid && $self->dbID($dbid);
00052     $stable_id && $self->stable_id($stable_id);
00053     $description && $self->description($description);
00054     $source_name && $self->source_name($source_name);
00055     $adaptor && $self->adaptor($adaptor);
00056     $taxon_id && $self->taxon_id($taxon_id);
00057     $genome_db_id && $self->genome_db_id($genome_db_id);
00058     $sequence_id && $self->sequence_id($sequence_id);
00059   }
00060 
00061   return $self;
00062 }
00063 
00064 
00065 =head2 copy
00066 
00067   Arg [1]    : object $parent_object (optional)
00068   Example    :
00069   Description: copies the object, optionally by topping up a given structure (to support multiple inheritance)
00070   Returntype :
00071   Exceptions :
00072   Caller     :
00073 
00074 =cut
00075 
00076 sub copy {
00077   my $self = shift;
00078   
00079   my $mycopy = @_ ? shift : {};
00080   bless $mycopy, 'Bio::EnsEMBL::Compara::Member';
00081   
00082   $mycopy->dbID($self->dbID);
00083   $mycopy->stable_id($self->stable_id);
00084   $mycopy->version($self->version);
00085   $mycopy->description($self->description);
00086   $mycopy->source_name($self->source_name);
00087   #$mycopy->adaptor($self->adaptor);
00088   $mycopy->chr_name($self->chr_name);
00089   $mycopy->chr_start($self->chr_start);
00090   $mycopy->chr_end($self->chr_end);
00091   $mycopy->chr_strand($self->chr_strand);
00092   $mycopy->taxon_id($self->taxon_id);
00093   $mycopy->genome_db_id($self->genome_db_id);
00094   $mycopy->sequence_id($self->sequence_id);
00095   $mycopy->gene_member_id($self->gene_member_id);
00096   $mycopy->display_label($self->display_label);
00097   
00098   return $mycopy;
00099 }
00100 
00101 
00102 =head2 new_fast
00103 
00104   Arg [1]    : hash reference $hashref
00105   Example    : none
00106   Description: This is an ultra fast constructor which requires knowledge of
00107                the objects internals to be used.
00108   Returntype :
00109   Exceptions : none
00110   Caller     :
00111 
00112 =cut
00113 
00114 sub new_fast {
00115   my ($class, $hashref) = @_;
00116 
00117   return bless $hashref, $class;
00118 }
00119 
00120 =head2 new_from_gene
00121 
00122   Args       : Requires both an Bio::Ensembl:Gene object and a
00123              : Bio::Ensembl:Compara:GenomeDB object
00124   Example    : $member = Bio::EnsEMBL::Compara::Member->new_from_gene(
00125                 -gene   => $gene,
00126                 -genome_db => $genome_db);
00127   Description: contructor method which takes an Ensembl::Gene object
00128                and Compara:GenomeDB object and creates a new Member object
00129                translating from the Gene object
00130   Returntype : Bio::Ensembl::Compara::Member
00131   Exceptions :
00132   Caller     :
00133 
00134 =cut
00135 
00136 sub new_from_gene {
00137   my ($class, @args) = @_;
00138   my $self = $class->new(@args);
00139 
00140   if (scalar @args) {
00141 
00142     my ($gene, $genome_db) = rearrange([qw(GENE GENOME_DB)], @args);
00143 
00144     unless(defined($gene) and $gene->isa('Bio::EnsEMBL::Gene')) {
00145       throw(
00146       "gene arg must be a [Bio::EnsEMBL::Gene] ".
00147       "not a [$gene]");
00148     }
00149     unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
00150       throw(
00151       "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ".
00152       "not a [$genome_db]");
00153     }
00154     unless (defined $gene->stable_id) {
00155       throw("COREDB error: does not contain gene_stable_id for gene_id ". $gene->dbID."\n");
00156     }
00157 
00158     $self->stable_id($gene->stable_id);
00159     $self->taxon_id($genome_db->taxon_id);
00160     $self->description($gene->description);
00161     $self->genome_db_id($genome_db->dbID);
00162     $self->chr_name($gene->seq_region_name);
00163     $self->chr_start($gene->seq_region_start);
00164     $self->chr_end($gene->seq_region_end);
00165     $self->chr_strand($gene->seq_region_strand);
00166     $self->source_name("ENSEMBLGENE");
00167     $self->version($gene->version);
00168   }
00169   return $self;
00170 }
00171 
00172 
00173 =head2 new_from_transcript
00174 
00175   Arg[1]     : Bio::Ensembl:Transcript object
00176   Arg[2]     : Bio::Ensembl:Compara:GenomeDB object
00177   Arg[3]     : string where value='translate' causes transcript object to translate
00178                to a peptide
00179   Example    : $member = Bio::EnsEMBL::Compara::Member->new_from_transcript(
00180                   $transcript, $genome_db,
00181                 -translate);
00182   Description: contructor method which takes an Ensembl::Gene object
00183                and Compara:GenomeDB object and creates a new Member object
00184                translating from the Gene object
00185   Returntype : Bio::Ensembl::Compara::Member
00186   Exceptions :
00187   Caller     :
00188 
00189 =cut
00190 
00191 sub new_from_transcript {
00192   my ($class, @args) = @_;
00193   my $self = $class->new(@args);
00194   my $peptideBioSeq;
00195   my $seq_string;
00196 
00197   my ($transcript, $genome_db, $translate, $description) = rearrange([qw(TRANSCRIPT GENOME_DB TRANSLATE DESCRIPTION)], @args);
00198   #my ($transcript, $genome_db, $translate) = @args;
00199 
00200   unless(defined($transcript) and $transcript->isa('Bio::EnsEMBL::Transcript')) {
00201     throw(
00202     "transcript arg must be a [Bio::EnsEMBL::Transcript]".
00203     "not a [$transcript]");
00204   }
00205   unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
00206     throw(
00207     "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ".
00208     "not a [$genome_db]");
00209   }
00210   $self->taxon_id($genome_db->taxon_id);
00211   if(defined($description)) { $self->description($description); }
00212   else { $self->description("NULL"); }
00213   $self->genome_db_id($genome_db->dbID);
00214   $self->chr_name($transcript->seq_region_name);
00215   $self->chr_start($transcript->coding_region_start);
00216   $self->chr_end($transcript->coding_region_end);
00217   $self->chr_strand($transcript->seq_region_strand);
00218   $self->version($transcript->translation->version) if ($translate eq 'yes');
00219 
00220   if(($translate eq 'translate') or ($translate eq 'yes')) {
00221     if(not defined($transcript->translation)) {
00222       throw("request to translate a transcript without a defined translation",
00223             $transcript->stable_id);
00224     }
00225     unless (defined $transcript->translation->stable_id) {
00226       throw("COREDB error: does not contain translation stable id for translation_id ".$transcript->translation->dbID."\n");
00227     }
00228     
00229     $self->stable_id($transcript->translation->stable_id);
00230     $self->source_name("ENSEMBLPEP");
00231     
00232     unless ($peptideBioSeq = $transcript->translate) {
00233       throw("COREDB error: unable to get a BioSeq translation from ". $transcript->stable_id);
00234     }
00235     eval {
00236       $seq_string = $peptideBioSeq->seq;
00237     };
00238     throw "COREDB error: can't get seq from peptideBioSeq" if $@;
00239     # OR
00240     #$seq_string = $transcript->translation->seq;
00241     
00242     if ($seq_string =~ /^X+$/) {
00243       warn("X+ in sequence from translation " . $transcript->translation->stable_id."\n");
00244     }
00245     elsif (length($seq_string) == 0) {
00246       warn("zero length sequence from translation " . $transcript->translation->stable_id."\n");
00247     }
00248     else {
00249       #$seq_string =~ s/(.{72})/$1\n/g;
00250       $self->sequence($seq_string);
00251     }
00252   } elsif ($translate eq 'ncrna') {
00253     unless (defined $transcript->stable_id) {
00254       throw("COREDB error: does not contain transcript stable id for transcript_id ".$transcript->dbID."\n");
00255     }
00256     $self->stable_id($transcript->stable_id);
00257     $self->source_name("ENSEMBLTRANS");
00258 
00259     unless ($seq_string = $transcript->spliced_seq) {
00260       throw("COREDB error: unable to get a BioSeq spliced_seq from ". $transcript->stable_id);
00261     }
00262     if (length($seq_string) == 0) {
00263       warn("zero length sequence from transcript " . $transcript->stable_id."\n");
00264     }
00265     $self->sequence($seq_string);
00266   }
00267   
00268   #print("Member->new_from_transcript\n");
00269   #print("  source_name = '" . $self->source_name . "'\n");
00270   #print("  stable_id = '" . $self->stable_id . "'\n");
00271   #print("  taxon_id = '" . $self->taxon_id . "'\n");
00272   #print("  chr_name = '" . $self->chr_name . "'\n");
00273   return $self;
00274 }
00275 
00276 
00277 =head2 member_id
00278 
00279   Arg [1]    : int $member_id (optional)
00280   Example    :
00281   Description:
00282   Returntype :
00283   Exceptions :
00284   Caller     :
00285 
00286 =cut
00287 
00288 sub member_id {
00289   my $self = shift;
00290   return $self->dbID(@_);
00291 }
00292 
00293 
00294 =head2 dbID
00295 
00296   Arg [1]    : int $dbID (optional)
00297   Example    :
00298   Description:
00299   Returntype :
00300   Exceptions :
00301   Caller     :
00302 
00303 =cut
00304 
00305 sub dbID {
00306   my $self = shift;
00307   $self->{'_dbID'} = shift if(@_);
00308   return $self->{'_dbID'};
00309 }
00310 
00311 =head2 stable_id
00312 
00313   Arg [1]    : string $stable_id (optional)
00314   Example    :
00315   Description:
00316   Returntype :
00317   Exceptions :
00318   Caller     :
00319 
00320 =cut
00321 
00322 sub stable_id {
00323   my $self = shift;
00324   $self->{'_stable_id'} = shift if(@_);
00325   return $self->{'_stable_id'};
00326 }
00327 
00328 =head2 display_label
00329 
00330   Arg [1]    : string $display_label (optional)
00331   Example    :
00332   Description:
00333   Returntype :
00334   Exceptions :
00335   Caller     :
00336 
00337 =cut
00338 
00339 sub display_label {
00340   my $self = shift;
00341   $self->{'_display_label'} = shift if(@_);
00342   return $self->{'_display_label'};
00343 }
00344 
00345 =head2 version
00346 
00347   Arg [1]    :
00348   Example    :
00349   Description:
00350   Returntype :
00351   Exceptions :
00352   Caller     :
00353 
00354 =cut
00355 
00356 sub version {
00357   my $self = shift;
00358   $self->{'_version'} = shift if(@_);
00359   $self->{'_version'} = 0 unless(defined($self->{'_version'}));
00360   return $self->{'_version'};
00361 }
00362 
00363 =head2 description
00364 
00365   Arg [1]    : string $description (optional)
00366   Example    :
00367   Description:
00368   Returntype : string
00369   Exceptions :
00370   Caller     :
00371 
00372 =cut
00373 
00374 sub description {
00375   my $self = shift;
00376   $self->{'_description'} = shift if(@_);
00377   return $self->{'_description'};
00378 }
00379 
00380 =head2 source_name
00381 
00382 =cut
00383 
00384 sub source_name {
00385   my $self = shift;
00386   $self->{'_source_name'} = shift if (@_);
00387   return $self->{'_source_name'};
00388 }
00389 
00390 =head2 adaptor
00391 
00392   Arg [1]    : string $adaptor (optional)
00393                corresponding to a perl module
00394   Example    :
00395   Description:
00396   Returntype :
00397   Exceptions :
00398   Caller     :
00399 
00400 =cut
00401 
00402 sub adaptor {
00403   my $self = shift;
00404   $self->{'_adaptor'} = shift if(@_);
00405   return $self->{'_adaptor'};
00406 }
00407 
00408 =head2 chr_name
00409 
00410 =cut
00411 
00412 sub chr_name {
00413   my $self = shift;
00414   $self->{'_chr_name'} = shift if (@_);
00415   return $self->{'_chr_name'};
00416 }
00417 
00418 =head2 chr_start
00419 
00420 =cut
00421 
00422 sub chr_start {
00423   my $self = shift;
00424   $self->{'_chr_start'} = shift if (@_);
00425   return $self->{'_chr_start'};
00426 }
00427 
00428 =head2 chr_end
00429 
00430 =cut
00431 
00432 sub chr_end {
00433   my $self = shift;
00434   $self->{'_chr_end'} = shift if (@_);
00435   return $self->{'_chr_end'};
00436 }
00437 
00438 =head2 chr_strand
00439 
00440   Arg [1]    : integer
00441   Description: Returns the strand of the member.  Defined strands are 1 or -1.
00442                0 is undefined strand.
00443   Returntype : 1,0,-1
00444   Exceptions : none
00445   Caller     : general
00446 
00447 =cut
00448 
00449 sub chr_strand {
00450   my $self = shift;
00451   $self->{'_chr_strand'} = shift if (@_);
00452   $self->{'_chr_strand'}='0' unless(defined($self->{'_chr_strand'}));
00453   return $self->{'_chr_strand'};
00454 }
00455 
00456 =head2 taxon_id
00457 
00458 =cut
00459 
00460 sub taxon_id {
00461     my $self = shift;
00462     $self->{'_taxon_id'} = shift if (@_);
00463     return $self->{'_taxon_id'};
00464 }
00465 
00466 =head2 taxon
00467 
00468 =cut
00469 
00470 sub taxon {
00471   my $self = shift;
00472 
00473   if (@_) {
00474     my $taxon = shift;
00475     unless ($taxon->isa('Bio::EnsEMBL::Compara::NCBITaxon')) {
00476       throw(
00477            "taxon arg must be a [Bio::EnsEMBL::Compara::NCBITaxon".
00478            "not a [$taxon]");
00479     }
00480     $self->{'_taxon'} = $taxon;
00481     $self->taxon_id($taxon->ncbi_taxid);
00482   } else {
00483     unless (defined $self->{'_taxon'}) {
00484       unless (defined $self->taxon_id) {
00485         throw("can't fetch Taxon without a taxon_id");
00486       }
00487       my $NCBITaxonAdaptor = $self->adaptor->db->get_NCBITaxonAdaptor;
00488       $self->{'_taxon'} = $NCBITaxonAdaptor->fetch_node_by_taxon_id($self->taxon_id);
00489     }
00490   }
00491 
00492   return $self->{'_taxon'};
00493 }
00494 
00495 =head2 genome_db_id
00496 
00497 =cut
00498 
00499 sub genome_db_id {
00500     my $self = shift;
00501     $self->{'_genome_db_id'} = shift if (@_);
00502     return $self->{'_genome_db_id'};
00503 }
00504 
00505 =head2 genome_db
00506 
00507 =cut
00508 
00509 sub genome_db {
00510   my $self = shift;
00511 
00512   if (@_) {
00513     my $genome_db = shift;
00514     unless ($genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
00515       throw(
00516            "arg must be a [Bio::EnsEMBL::Compara::GenomeDB".
00517            "not a [$genome_db]");
00518     }
00519     $self->{'_genome_db'} = $genome_db;
00520     $self->genome_db_id($genome_db->dbID);
00521   } else {
00522     unless (defined $self->{'_genome_db'}) {
00523       unless (defined $self->genome_db_id and defined $self->adaptor) {
00524         throw("can't fetch GenomeDB without an adaptor and genome_db_id");
00525       }
00526       my $GenomeDBAdaptor = $self->adaptor->db->get_GenomeDBAdaptor;
00527       $self->{'_genome_db'} = $GenomeDBAdaptor->fetch_by_dbID($self->genome_db_id);
00528     }
00529   }
00530 
00531   return $self->{'_genome_db'};
00532 }
00533 
00534 =head2 sequence
00535 
00536   Arg [1]    : string $sequence
00537   Example    : my $seq = $member->sequence;
00538   Description: Get/set the sequence string of this member
00539                Will lazy load by sequence_id if needed and able
00540   Returntype : string
00541   Exceptions : none
00542   Caller     : general
00543 
00544 =cut
00545 
00546 sub sequence {
00547   my $self = shift;
00548 
00549   if(@_) {
00550     $self->{'_seq_length'} = undef;
00551     $self->{'_sequence'} = shift;
00552     $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'}));
00553     return $self->{'_sequence'};
00554   }
00555   
00556   if(!defined($self->{'_sequence'}) and
00557      defined($self->sequence_id()) and     
00558      defined($self->adaptor))
00559   {
00560     $self->{'_sequence'} = $self->adaptor->_fetch_sequence_by_id($self->sequence_id);
00561     $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'}));
00562   }
00563 
00564   return $self->{'_sequence'};
00565 }
00566 
00567 =head2 sequence_exon_cased
00568 
00569   Args       : none
00570   Example    : my $sequence_exon_cased = $member->sequence_exon_cased;
00571 
00572   Description: Get/set the sequence string of this peptide member with
00573                alternating upper and lower case corresponding to the translateable exons.
00574   Returntype : string
00575   Exceptions : none
00576   Caller     : general
00577 
00578 =cut
00579 
00580 sub sequence_exon_cased {
00581   my $self = shift;
00582 
00583   my $sequence = $self->sequence;
00584   my $trans = $self->transcript;
00585   my @exons = @{$trans->get_all_translateable_Exons};
00586   return $sequence if (1 == scalar @exons);
00587 
00588   my %splice_site;
00589   my $pep_len = 0;
00590   my $overlap_len = 0;
00591   while (my $exon = shift @exons) {
00592     my $exon_len = $exon->length;
00593     my $pep_seq = $exon->peptide($trans)->length;
00594     # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq
00595     $pep_seq -= 1 if ($overlap_len);
00596     $pep_len += $pep_seq;
00597     if ($overlap_len = (($exon_len + $overlap_len ) %3)){          # if there is an overlap
00598       $splice_site{$pep_len-1}{'overlap'} = $pep_len -1;         # stores overlapping aa-exon boundary
00599     } else {
00600       $overlap_len = 0;
00601     }
00602     $splice_site{$pep_len}{'phase'} = $overlap_len;                 # positions of exon boundary
00603   }
00604 
00605   my @exon_sequences = ();
00606   foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end
00607     next if (defined($splice_site{$pep_len}{'overlap'}));
00608     next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein
00609     my $length = $pep_len;
00610     $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'});
00611     my $peptide;
00612     $peptide = substr($sequence,$length,length($sequence),'');
00613     unshift(@exon_sequences, $peptide);
00614   }
00615   unshift(@exon_sequences, $sequence); # First exon (last piece of sequence left)
00616 
00617   my $splice = 1;
00618   foreach my $exon_sequence (@exon_sequences) {
00619     if ($splice % 2 == 0) {
00620       $exon_sequence = lc($exon_sequence);
00621     }
00622     $splice++;
00623   }  
00624   my $seqsplice = join("", @exon_sequences);
00625 
00626   return $seqsplice;
00627 }
00628 
00629 sub sequence_exon_bounded {
00630   my $self = shift;
00631 
00632   if(@_) {
00633     $self->{'_sequence_exon_bounded'} = shift;
00634     return $self->{'_sequence_exon_bounded'};
00635   }
00636 
00637   if(!defined($self->{'_sequence_exon_bounded'})) {
00638     $self->{'_sequence_exon_bounded'} = $self->adaptor->db->get_MemberAdaptor->_fetch_sequence_exon_bounded_by_member_id($self->member_id);
00639   }
00640 
00641   if(!defined($self->{'_sequence_exon_bounded'})) {
00642     $self->{'_sequence_exon_bounded'} = $self->_compose_sequence_exon_bounded;
00643   }
00644 
00645   return $self->{'_sequence_exon_bounded'};
00646 }
00647 
00648 
00649 sub _compose_sequence_exon_bounded {
00650   my $self = shift;
00651 
00652   my $sequence = $self->sequence;
00653   my $trans = $self->transcript;
00654   my @exons = @{$trans->get_all_translateable_Exons};
00655   return $sequence if (1 == scalar @exons);
00656 
00657   my %splice_site;
00658   my $pep_len = 0;
00659   my $overlap_len = 0;
00660   while (my $exon = shift @exons) {
00661     my $exon_len = $exon->length;
00662     my $pep_seq = $exon->peptide($trans)->length;
00663     # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq
00664     $pep_seq -= 1 if ($overlap_len);
00665     $pep_len += $pep_seq;
00666     if ($overlap_len = (($exon_len + $overlap_len ) %3)){          # if there is an overlap
00667       $splice_site{$pep_len-1}{'overlap'} = $pep_len -1;         # stores overlapping aa-exon boundary
00668     } else {
00669       $overlap_len = 0;
00670     }
00671     $splice_site{$pep_len}{'phase'} = $overlap_len;                 # positions of exon boundary
00672   }
00673 
00674   my $seqsplice = '';
00675   foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end
00676     next if (defined($splice_site{$pep_len}{'overlap'}));
00677     next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein
00678     my $length = $pep_len;
00679     $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'});
00680     my $peptide;
00681     $peptide = substr($sequence,$length,length($sequence),'');
00682     $seqsplice = $peptide . $seqsplice;
00683     $seqsplice = 'o' . $seqsplice if (0 == $splice_site{$pep_len}{'phase'});
00684     $seqsplice = 'b' . $seqsplice if (1 == $splice_site{$pep_len}{'phase'});
00685     $seqsplice = 'j' . $seqsplice if (2 == $splice_site{$pep_len}{'phase'});
00686   }
00687   $seqsplice = $sequence . $seqsplice; # First exon AS IS
00688 
00689   return $seqsplice;
00690 }
00691 
00692 sub sequence_cds {
00693   my $self = shift;
00694 
00695   if(@_) {
00696     $self->{'_sequence_cds'} = shift;
00697     return $self->{'_sequence_cds'};
00698   }
00699 
00700   if(!defined($self->{'_sequence_cds'})) {
00701     $self->{'_sequence_cds'} = $self->adaptor->db->get_MemberAdaptor->_fetch_sequence_cds_by_member_id($self->member_id);
00702   }
00703 
00704   if(!defined($self->{'_sequence_cds'})) {
00705     $self->{'_sequence_cds'} = $self->transcript->translateable_seq;
00706   }
00707 
00708   return $self->{'_sequence_cds'};
00709 }
00710 
00711 # GJ 2008-11-17
00712 # Returns the amino acid sequence with exon boundaries denoted as O, B, or J depending on the phase (O=0, B=1, J=2)
00713 sub get_exon_bounded_sequence {
00714     my $self = shift;
00715     my $numbers = shift;
00716     my $transcript = $self->get_Transcript;
00717 
00718     # The get_all_translateable_exons creates a list of reformatted "translateable" exon sequences.
00719     # When the exon phase is 1 or 2, there will be duplicated residues at the end and start of exons.
00720     # We'll deal with this during the exon loop.
00721     my @exons = @{$transcript->get_all_translateable_Exons};
00722     my $seq_string = "";
00723     # for my $ex (@exons) {
00724     while (my $ex = shift @exons) {
00725     my $seq = $ex->peptide($transcript)->seq;
00726 
00727     # PHASE HANDLING
00728     my $phase = $ex->phase;
00729     my $end_phase = $ex->end_phase;
00730 
00731     # First, cut off repeated end residues.
00732     if ($end_phase == 1 && 0 < scalar @exons) {
00733         # We only own 1/3, so drop the last residue.
00734         $seq = substr($seq,0,-1);
00735     }
00736 
00737     # Now cut off repeated start residues.
00738     if ($phase == 2) {
00739         # We only own 1/3, so drop the first residue.
00740         $seq = substr($seq, 1);
00741     }
00742 
00743     if ($end_phase > -1) {
00744         $seq = $seq . 'o' if ($end_phase == 0);
00745         $seq = $seq . 'b' if ($end_phase == 1);
00746         $seq = $seq . 'j' if ($end_phase == 2);
00747     }
00748     #print "Start_phase: $phase   End_phase: $end_phase\t$seq\n";
00749     $seq_string .= $seq;
00750     }
00751     if (defined $numbers) {
00752       $seq_string =~ s/o/0/g; $seq_string =~ s/b/1/g; $seq_string =~ s/j/2/g;
00753     }
00754     return $seq_string;
00755 }
00756 
00757 =head2 seq_length
00758 
00759   Example    : my $seq_length = $member->seq_length;
00760   Description: get the sequence length of this member
00761   Returntype : int
00762   Exceptions : none
00763   Caller     : general
00764 
00765 =cut
00766 
00767 sub seq_length {
00768   my $self = shift;
00769 
00770   unless(defined($self->{'_seq_length'})) {
00771     #need to check case if user is calling seq_length first
00772     #call $self->sequence (to lazy load if needed)
00773     my $seq = $self->sequence;
00774     $self->{'_seq_length'} = length($seq) if(defined($seq));
00775   }
00776   return $self->{'_seq_length'};
00777 }
00778 
00779 
00780 =head2 sequence_id
00781 
00782   Arg [1]    : int $sequence_id
00783   Example    : my $sequence_id = $member->sequence_id;
00784   Description: Extracts the sequence_id of this member
00785   Returntype : int
00786   Exceptions : none
00787   Caller     : general
00788 
00789 =cut
00790 
00791 sub sequence_id {
00792   my $self = shift;
00793   $self->{'_sequence_id'} = shift if(@_);
00794   if(!defined($self->{'_sequence_id'})) { $self->{'_sequence_id'}=0; }
00795   return $self->{'_sequence_id'};
00796 }
00797 
00798 =head2 gene_member_id
00799 
00800   Arg [1]    : int $gene_member_id
00801   Example    : my $gene_member_id = $member->gene_member_id;
00802   Description: Gene_member_id of this protein member
00803   Returntype : int
00804   Exceptions : none
00805   Caller     : general
00806 
00807 =cut
00808 
00809 sub gene_member_id {
00810   my $self = shift;
00811   $self->{'_gene_member_id'} = shift if(@_);
00812   return $self->{'_gene_member_id'};
00813 }
00814 
00815 
00816 =head2 bioseq
00817 
00818   Args       : none
00819   Example    : my $primaryseq = $member->primaryseq;
00820   Description: returns sequence this member as a Bio::Seq object
00821   Returntype : Bio::Seq object
00822   Exceptions : none
00823   Caller     : general
00824 
00825 =cut
00826 
00827 sub bioseq {
00828   my $self = shift;
00829 
00830   throw("Member stable_id undefined") unless defined($self->stable_id());
00831   throw("No sequence for member " . $self->stable_id()) unless defined($self->sequence());
00832 
00833   my $seqname;
00834   if (defined($self->genome_db_id) and defined($self->dbID)) {
00835     $seqname = "IDs:" . $self->genome_db_id . ":" . $self->dbID . " " .
00836         $self->source_name . ":" . $self->stable_id;
00837   } else {
00838     $seqname = $self->source_name . ":" . $self->stable_id;
00839   }
00840   my $seq = Bio::Seq->new(-seq        => $self->sequence(),
00841                           -primary_id => "member_id_".$self->dbID,
00842                           -display_id => "member_id_".$self->dbID,
00843                           -desc       => $seqname ."|". $self->description(),
00844                          );
00845   return $seq;
00846 }
00847 
00848 =head2 gene_member
00849 
00850   Arg[1]     : Bio::EnsEMBL::Compara::Member $geneMember (optional)
00851   Example    : my $gene_member = $member->gene_member;
00852   Description: returns gene member object for this protein member
00853   Returntype : Bio::EnsEMBL::Compara::Member object
00854   Exceptions : if arg[0] is not a Bio::EnsEMBL::Compara::Member object
00855   Caller     : MemberAdaptor(set), general
00856 
00857 =cut
00858 
00859 sub gene_member {
00860   my $self = shift;
00861   my $gene_member = shift;
00862 
00863   if ($gene_member) {
00864     throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$gene_member]")
00865       unless ($gene_member->isa('Bio::EnsEMBL::Compara::Member'));
00866     $self->{'_gene_member'} = $gene_member;
00867   }
00868   if(!defined($self->{'_gene_member'}) and
00869      defined($self->adaptor) and $self->dbID)
00870   {
00871     $self->{'_gene_member'} = $self->adaptor->db->get_MemberAdaptor->fetch_gene_for_peptide_member_id($self->dbID);
00872   }
00873   return $self->{'_gene_member'};
00874 }
00875 
00876 =head2 print_member
00877 
00878   Arg[1]     : string $postfix
00879   Example    : $member->print_member("BRH");
00880   Description: used for debugging, prints out key descriptive elements
00881                of member
00882   Returntype : none
00883   Exceptions : none
00884   Caller     : general
00885 
00886 =cut
00887 
00888 sub print_member
00889 
00890 {
00891   my $self = shift;
00892   my $postfix = shift;
00893 
00894   printf("   %s %s(%d)\t%s : %d-%d",$self->source_name, $self->stable_id,
00895          $self->dbID,$self->chr_name,$self->chr_start, $self->chr_end);
00896   if($postfix) { print(" $postfix"); }
00897   else { print("\n"); }
00898 }
00899 
00900 
00901 =head2 get_Gene
00902 
00903   Args       : none
00904   Example    : $gene = $member->get_Gene
00905   Description: if member is an 'ENSEMBLGENE' returns Bio::EnsEMBL::Gene object
00906                by connecting to ensembl genome core database
00907                REQUIRES properly setup Registry conf file or
00908                manually setting genome_db->db_adaptor for each genome.
00909   Returntype : Bio::EnsEMBL::Gene or undef
00910   Exceptions : none
00911   Caller     : general
00912 
00913 =cut
00914 
00915 sub get_Gene {
00916   my $self = shift;
00917   
00918   return $self->{'core_gene'} if($self->{'core_gene'});
00919   
00920   unless($self->genome_db and 
00921          $self->genome_db->db_adaptor and
00922          $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) 
00923   {
00924     throw("unable to connect to core ensembl database: missing registry and genome_db.locator");
00925   }
00926 
00927   my $coreDBA = $self->genome_db->db_adaptor;
00928   if($self->source_name eq 'ENSEMBLGENE') {    
00929     $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->stable_id);
00930   }
00931   if($self->source_name eq 'ENSEMBLPEP') {
00932     $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->gene_member->stable_id);
00933   }
00934   return $self->{'core_gene'};
00935 }
00936 
00937 =head2 get_Transcript
00938 
00939   Args       : none
00940   Example    : $transcript = $member->get_Transcript
00941   Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Transcript object
00942                by connecting to ensembl genome core database
00943                REQUIRES properly setup Registry conf file or
00944                manually setting genome_db->db_adaptor for each genome.
00945   Returntype : Bio::EnsEMBL::Transcript or undef
00946   Exceptions : none
00947   Caller     : general
00948 
00949 =cut
00950 
00951 sub get_Transcript {
00952   my $self = shift;
00953   
00954   return undef unless($self->source_name eq 'ENSEMBLPEP');
00955   return $self->{'core_transcript'} if($self->{'core_transcript'});
00956 
00957   unless($self->genome_db and 
00958          $self->genome_db->db_adaptor and
00959          $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) 
00960   {
00961     throw("unable to connect to core ensembl database: missing registry and genome_db.locator");
00962   }
00963   my $coreDBA = $self->genome_db->db_adaptor;
00964   $self->{'core_transcript'} = $coreDBA->get_TranscriptAdaptor->fetch_by_translation_stable_id($self->stable_id);
00965   return $self->{'core_transcript'};
00966 }
00967 
00968 
00969 =head2 get_Translation
00970 
00971   Args       : none
00972   Example    : $translation = $member->get_Translation
00973   Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Translation object
00974                by connecting to ensembl genome core database
00975                REQUIRES properly setup Registry conf file or
00976                manually setting genome_db->db_adaptor for each genome.
00977   Returntype : Bio::EnsEMBL::Gene or undef
00978   Exceptions : none
00979   Caller     : general
00980 
00981 =cut
00982 
00983 sub get_Translation {
00984   my $self = shift;
00985 
00986   if($self->get_Transcript) {
00987     my $transcript = $self->get_Transcript;
00988     return $transcript->translation();
00989   }
00990   return undef;
00991 }
00992 
00993 sub gene {
00994   my $self = shift;
00995   return $self->get_Gene;
00996 }
00997 sub transcript {
00998   my $self = shift;
00999   return $self->get_Transcript;
01000 }
01001 sub translation {
01002   my $self = shift;
01003   return $self->get_Translation();
01004 }
01005 
01006 
01007 =head2 get_canonical_peptide_Member
01008 
01009   Args       : none
01010   Example    : $canonicalPepMember = $member->get_canonical_peptide_Member
01011   Description: if member is an "ENSEMBLGENE" it will return the canonical peptide member
01012                if member is an 'ENSEMBLPEP' it will get its gene member and have it
01013                return the canonical peptide (which could be the same as the starting member)
01014   Returntype : Bio::EnsEMBL::Compara::Member or undef
01015   Exceptions : none
01016   Caller     : general
01017 
01018 =cut
01019 
01020 sub get_canonical_peptide_Member {
01021     my $self = shift;
01022 
01023     return unless($self->adaptor);
01024 
01025     my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_canonical_peptide_member_for_gene_member_id')
01026         ? $self->adaptor    # a MemberAdaptor or derivative
01027         : $self->adaptor->db->get_MemberAdaptor;
01028 
01029     if($self->source_name eq 'ENSEMBLGENE') {
01030 
01031         return $able_adaptor->fetch_canonical_peptide_member_for_gene_member_id($self->dbID);
01032 
01033     } elsif($self->source_name eq 'ENSEMBLPEP') {
01034 
01035         my $geneMember = $self->gene_member or return;
01036 
01037         return $able_adaptor->fetch_canonical_peptide_member_for_gene_member_id($geneMember->dbID);
01038     } else {
01039 
01040         return undef;
01041     }
01042 }
01043 
01044 
01045 =head2 get_canonical_transcript_Member
01046 
01047   Args       : none
01048   Example    : $canonical_trans_member = $member->get_canonical_transcript_Member
01049   Description: if member is an "ENSEMBLGENE" it will return the canonical transcript member
01050                if member is an 'ENSEMBLTRANS' it will get its gene member and have it
01051                return the canonical transcript (which could be the same as the starting member).
01052                Note: This method is intended for ncRNA genes only. To access the canonical
01053                transcript for a protein-coding gene, please refer to the
01054                get_canonical_peptide_Member method
01055   Returntype : Bio::EnsEMBL::Compara::Member or undef
01056   Exceptions : none
01057   Caller     : general
01058 
01059 =cut
01060 
01061 sub get_canonical_transcript_Member {
01062     my $self = shift;
01063 
01064     return unless($self->adaptor);
01065 
01066     my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_canonical_transcript_member_for_gene_member_id')
01067         ? $self->adaptor    # a MemberAdaptor or derivative
01068         : $self->adaptor->db->get_MemberAdaptor;
01069 
01070     if($self->source_name eq 'ENSEMBLGENE') {
01071 
01072         return $able_adaptor->fetch_canonical_transcript_member_for_gene_member_id($self->dbID);
01073 
01074     } elsif($self->source_name eq 'ENSEMBLTRANS') {
01075 
01076         my $geneMember = $self->gene_member or return;
01077 
01078         return $able_adaptor->fetch_canonical_transcript_member_for_gene_member_id($geneMember->dbID);
01079     } else {
01080 
01081         return undef;
01082     }
01083 }
01084 
01085 
01086 =head2 get_all_peptide_Members
01087 
01088   Args       : none
01089   Example    : $pepMembers = $gene_member->get_all_peptide_Members
01090   Description: return listref of all peptide members of this gene_member
01091   Returntype : array ref of Bio::EnsEMBL::Compara::Member 
01092   Exceptions : throw if not an ENSEMBLGENE
01093   Caller     : general
01094 
01095 =cut
01096 
01097 sub get_all_peptide_Members {
01098     my $self = shift;
01099 
01100     throw("adaptor undefined, can access database") unless($self->adaptor);
01101     throw("not an ENSEMBLGENE member") if($self->source_name ne 'ENSEMBLGENE'); 
01102 
01103     my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_all_peptides_for_gene_member_id')
01104         ? $self->adaptor    # a MemberAdaptor or derivative
01105         : $self->adaptor->db->get_MemberAdaptor;
01106 
01107 
01108     return $able_adaptor->fetch_all_peptides_for_gene_member_id($self->dbID);
01109 }
01110  
01111 
01112 1;