Archive Ensembl HomeArchive Ensembl Home
Attribute.pm
Go to the documentation of this file.
00001 package Bio::EnsEMBL::Compara::Attribute;
00002 
00003 use strict;
00004 use Carp;
00005 use Bio::EnsEMBL::Utils::Exception;
00006 
00007 our ($AUTOLOAD, %ok_field);
00008 
00009 %ok_field = ('member_id' => 1,
00010              'family_id' => 1,
00011              'cigar_line' => 1,
00012              'cigar_start' => 1,
00013              'cigar_end' => 1,
00014              'domain_id' => 1,
00015              'member_start' => 1,
00016              'member_end' => 1,
00017              'homology_id' => 1,
00018              'peptide_member_id' => 1,
00019              'perc_cov' => 1,
00020              'perc_id' => 1,
00021              'perc_pos' => 1,
00022              'peptide_align_feature_id' => 1
00023              );
00024 
00025 
00026 sub new {
00027   my ($class) = @_;
00028 
00029   return bless {}, $class;
00030 }
00031 
00032 =head2 new_fast
00033 
00034   Arg [1]    : hash reference $hashref
00035   Example    : none
00036   Description: This is an ultra fast constructor which requires knowledge of
00037                the objects internals to be used.
00038   Returntype : 
00039   Exceptions : none
00040   Caller     : 
00041 
00042 =cut
00043 
00044 sub new_fast {
00045   my ($class, $hashref) = @_;
00046 
00047   return bless $hashref, $class;
00048 }
00049 
00050 sub AUTOLOAD {
00051   my $self = shift;
00052   my $method = $AUTOLOAD;
00053   $method =~ s/.*:://;
00054   croak "invalid method: ->$method()" unless $ok_field{$method};
00055   $self->{lc $method} = shift if(@_);
00056   return $self->{lc $method};
00057 }
00058 
00059 sub alignment_string {
00060   my ($self, $member) = @_;
00061 
00062   
00063 
00064   unless (defined $self->cigar_line) {
00065     throw("To get an alignment_string, the cigar_line needs to be define\n");
00066   }
00067   unless (defined $member) {
00068     throw("To get an alignment_string, the peptide member needs to be passed as an option\n");
00069   }
00070 
00071   unless (defined $self->{'alignment_string'}) {
00072     my $sequence = $member->sequence;
00073     if ((defined($self->cigar_start) && ($self->cigar_start != 0)) 
00074         || (defined $self->cigar_end  && ($self->cigar_end != 0))) {
00075       unless ((defined($self->cigar_start) && ($self->cigar_start != 0)) && 
00076                (defined $self->cigar_end  && ($self->cigar_end != 0))) {
00077 #     if (defined $self->cigar_start || defined $self->cigar_end) {
00078 #       unless (defined $self->cigar_start && defined $self->cigar_end) {
00079         throw("both cigar_start and cigar_end should be defined");
00080       }
00081       my $offset = $self->cigar_start - 1;
00082       my $length = $self->cigar_end - $self->cigar_start + 1;
00083       $sequence = substr($sequence, $offset, $length);
00084     }
00085 
00086     my $cigar_line = $self->cigar_line;
00087     $cigar_line =~ s/([MD])/$1 /g;
00088 
00089     my @cigar_segments = split " ",$cigar_line;
00090     my $alignment_string = "";
00091     my $seq_start = 0;
00092     foreach my $segment (@cigar_segments) {
00093       if ($segment =~ /^(\d*)D$/) {
00094         my $length = $1;
00095         $length = 1 if ($length eq "");
00096         $alignment_string .= "-" x $length;
00097       } elsif ($segment =~ /^(\d*)M$/) {
00098         my $length = $1;
00099         $length = 1 if ($length eq "");
00100         $alignment_string .= substr($sequence,$seq_start,$length);
00101         $seq_start += $length;
00102       }
00103     }
00104     $self->{'alignment_string'} = $alignment_string;
00105   }
00106 
00107   return $self->{'alignment_string'};
00108 }
00109 
00110 =head2 cdna_alignment_string
00111 
00112   Arg [1]    : none
00113   Example    : my $cdna_alignment = $family_member->cdna_alignment_string();
00114   Description: Converts the peptide alignment string to a cdna alignment
00115                string.  This only works for EnsEMBL peptides whose cdna can
00116                be retrieved from the attached EnsEMBL databse.
00117                If the cdna cannot be retrieved undef is returned and a
00118                warning is thrown.
00119   Returntype : string
00120   Exceptions : none
00121   Caller     : general
00122 
00123 =cut
00124 
00125 sub cdna_alignment_string {
00126   my ($self, $member, $changeSelenos) = @_;
00127   unless (defined $changeSelenos) {
00128       $changeSelenos = 0;
00129   }
00130 
00131   if($member->source_name ne 'ENSEMBLPEP') {
00132     warning("Don't know how to retrieve cdna for database [@{[$member->source_name]}]
00133       SPECIES @{[ $member->adaptor->db->get_GenomeDBAdaptor->fetch_by_dbID($member->genome_db_id)->db_adaptor->dbname ]}" );
00134     return undef;
00135   }
00136 
00137   unless (defined $self->{'cdna_alignment_string'}) {
00138     my $cdna = $member->sequence_cds();
00139     
00140     if (defined $self->cigar_start || defined $self->cigar_end) {
00141       unless (defined $self->cigar_start && defined $self->cigar_end) {
00142         throw("both cigar_start and cigar_end should be defined");
00143       }
00144       my $offset = $self->cigar_start * 3 - 3;
00145       my $length = ($self->cigar_end - $self->cigar_start + 1) * 3;
00146       $cdna = substr($cdna, $offset, $length);
00147     }
00148 
00149     my $start = 0;
00150     my $cdna_align_string = '';
00151 
00152     foreach my $pep (split(//,$self->alignment_string($member))) {
00153       
00154       if($pep eq '-') {
00155         $cdna_align_string .= '--- ';
00156       } elsif ($pep eq 'U' && $changeSelenos or $pep eq '*') {
00157     $cdna_align_string .= 'NNN ';
00158     $start += 3;  
00159       } else {
00160         my $codon = substr($cdna, $start, 3);
00161         unless (length($codon) == 3) {
00162           # sometimes the last codon contains only 1 or 2 nucleotides.
00163           # making sure that it has 3 by adding as many Ns as necessary
00164           $codon .= 'N' x (3 - length($codon));
00165         }
00166         $cdna_align_string .= $codon . ' ';
00167         $start += 3;
00168       }
00169     }
00170     $self->{'cdna_alignment_string'} = $cdna_align_string;
00171   }
00172   
00173   return $self->{'cdna_alignment_string'};
00174 }
00175 
00176 sub DESTROY {}
00177 
00178 1;