Archive Ensembl HomeArchive Ensembl Home
PairAlign.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 # POD documentation - main docs before the code
00022 
00023 =head1 NAME
00024 
00025 PairAlign - Dna pairwise alignment module
00026 
00027 =head1 SYNOPSIS
00028 
00029 Give standard usage here
00030 
00031 =head1 DESCRIPTION
00032 
00033 Contains list of sub alignments making up a dna-dna alignment
00034 
00035 Creation:
00036 
00037   my $pair = new Bio::EnsEMBL::FeaturePair(
00038     -start  => $qstart,
00039     -end    => $qend,
00040     -strand => $qstrand,
00041     -hstart => $hstart,
00042     -hend   => $hend,
00043     -hend   => $hstrand,
00044   );
00045 
00046   my $pairaln = new Bio::EnsEMBL::Analysis::PairAlign;
00047   $pairaln->addFeaturePair($pair);
00048 
00049 Any number of pair alignments can be added to the PairAlign object
00050 
00051 
00052 Manipulation:
00053 
00054 To convert between coordinates:
00055 
00056   my $cdna_coord = $pair->genomic2cDNA($gen_coord);
00057   my $gen_coord  = $pair->cDNA2genomic($cdna_coord);
00058 
00059 =head1 METHODS
00060 
00061 =cut
00062 
00063 package Bio::EnsEMBL::Analysis::PairAlign;
00064 
00065 use vars qw(@ISA);
00066 use strict;
00067 
00068 
00069 @ISA = qw(Bio::EnsEMBL::Root);
00070 
00071 sub new {
00072     my($class,@args) = @_;
00073     my $self = {};
00074     bless $self, $class;
00075 
00076     $self->{'_homol'} = [];
00077     
00078     return $self; # success - we hope!
00079 }
00080 
00081 sub addFeaturePair {
00082     my ($self,$pair) = @_;
00083 
00084     $self->throw("Not a Bio::EnsEMBL::FeaturePair object") unless ($pair->isa("Bio::EnsEMBL::FeaturePair"));
00085 
00086     push(@{$self->{'_pairs'}},$pair);
00087     
00088 }
00089 
00090 
00091 =head2 eachFeaturePair
00092 
00093  Title   : eachFeaturePait
00094  Usage   : my @pairs = $pair->eachFeaturePair
00095  Function: 
00096  Example : 
00097  Returns : Array of Bio::SeqFeature::FeaturePair
00098  Args    : none
00099 
00100 
00101 =cut
00102 
00103 sub eachFeaturePair {
00104     my ($self) = @_;
00105 
00106     if (defined($self->{'_pairs'})) {
00107     return @{$self->{'_pairs'}};
00108     }
00109 }
00110 
00111 sub get_hstrand {
00112     my ($self) = @_;
00113 
00114     my @features = $self->eachFeaturePair;
00115 
00116     return $features[0]->hstrand;
00117 }
00118 
00119 =head2 genomic2cDNA
00120 
00121  Title   : genomic2cDNA
00122  Usage   : my $cdna_coord = $pair->genomic2cDNA($gen_coord)
00123  Function: Converts a genomic coordinate to a cdna coordinate
00124  Example : 
00125  Returns : int
00126  Args    : int
00127 
00128 
00129 =cut
00130 
00131 sub genomic2cDNA {
00132   my ($self,$coord) = @_;
00133   my @pairs = $self->eachFeaturePair;
00134   
00135   @pairs = sort {$a->start <=> $b->start} @pairs;
00136   
00137   my $newcoord;
00138   
00139   HOMOL: while (my $sf1 = shift(@pairs)) {
00140     next HOMOL unless ($coord >= $sf1->start && $coord <= $sf1->end);
00141     
00142     if ($sf1->strand == 1 && $sf1->hstrand == 1) {
00143       $newcoord = $sf1->hstart + ($coord - $sf1->start);
00144       last HOMOL;
00145     } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
00146       $newcoord = $sf1->hend   - ($coord - $sf1->start);
00147       last HOMOL;
00148     } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
00149       $newcoord = $sf1->hstart + ($sf1->end - $coord);
00150       last HOMOL;
00151     } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
00152       $newcoord = $sf1->hend   - ($sf1->end - $coord);
00153       last HOMOL;
00154     } else {
00155       $self->throw("ERROR: Wrong strand value in FeaturePair (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
00156     }
00157   }    
00158   
00159   if (defined($newcoord)) {
00160     
00161     return $newcoord;
00162   } else {
00163     $self->throw("Couldn't convert $coord");
00164   }
00165 }
00166 
00167 =head2 cDNA2genomic
00168 
00169  Title   : cDNA2genomic
00170  Usage   : my $gen_coord = $pair->genomic2cDNA($cdna_coord)
00171  Function: Converts a cdna coordinate to a genomic coordinate
00172  Example : 
00173  Returns : int
00174  Args    : int
00175 
00176 
00177 =cut
00178 
00179 sub cDNA2genomic {
00180   my ($self,$coord) = @_;
00181   
00182   my @pairs = $self->eachFeaturePair;
00183   
00184   my $newcoord;
00185   
00186   HOMOL: while (my $sf1 = shift(@pairs)) {
00187     next HOMOL unless ($coord >= $sf1->hstart && $coord <= $sf1->hend);
00188     
00189     if ($sf1->strand == 1 && $sf1->hstrand == 1) {
00190       $newcoord = $sf1->start + ($coord - $sf1->hstart);
00191       last HOMOL;
00192     } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
00193       $newcoord = $sf1->start  +($sf1->hend - $coord);
00194       last HOMOL;
00195     } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
00196       $newcoord = $sf1->end   - ($coord - $sf1->hstart);
00197       last HOMOL;
00198     } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
00199       $newcoord = $sf1->end   - ($sf1->hend - $coord);
00200       last HOMOL; 
00201     } else {
00202       $self->throw("ERROR: Wrong strand value in homol (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
00203     }
00204   }
00205   
00206   if (defined ($newcoord)) {
00207     return $newcoord;
00208   } else {
00209     $self->throw("Couldn't convert $coord\n");
00210   }
00211 }
00212 
00213 sub find_Pair {
00214   my ($self,$coord) = @_;
00215   
00216   foreach my $p ($self->eachFeaturePair) {
00217     if ($coord >= $p->hstart && $coord <= $p->hend) {
00218       return $p;
00219     }
00220   }
00221 }
00222 
00223 =head2 convert_cDNA_feature
00224 
00225  Title   : convert_cDNA_feature
00226  Usage   : my @newfeatures = $self->convert_cDNA_feature($f);
00227  Function: Converts a feature on the cDNA into an array of 
00228            features on the genomic (for features that span across introns);
00229  Example : 
00230  Returns : @Bio::EnsEMBL::FeaturePair
00231  Args    : Bio::EnsEMBL::FeaturePair
00232 
00233 =cut
00234 
00235 sub convert_cDNA_feature {
00236   my ($self,$feature) = @_;
00237     
00238   my $foundstart = 0;
00239   my $foundend   = 0;
00240   
00241   my @pairs = $self->eachFeaturePair;
00242   my @newfeatures;
00243   
00244   HOMOL: while (my $sf1 = shift(@pairs)) {
00245     my $skip = 0;
00246     #print STDERR "Looking at cDNA exon " . $sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
00247     
00248     $skip = 1 unless ($feature->start >= $sf1->hstart 
00249                       && $feature->start <= $sf1->hend);
00250     
00251     if($skip){
00252       #print STDERR "Skipping ".$sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
00253       next HOMOL;
00254     }
00255     if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
00256       $foundend = 1;
00257     }
00258     
00259     my $startcoord = $self->cDNA2genomic($feature->start);
00260     my $endcoord;
00261     
00262     if ($sf1->hstrand == 1) {
00263       $endcoord   = $sf1->end;
00264     } else {
00265       $endcoord   = $sf1->start;
00266     }
00267     
00268     if ($foundend) {
00269       $endcoord = $self->cDNA2genomic($feature->end);
00270     }
00271       
00272     #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
00273     
00274     my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
00275                                             -start   => $startcoord,
00276                                             -end     => $endcoord,
00277                                             -strand  => $feature->strand);
00278     push(@newfeatures,$tmpf);
00279     last;
00280   }
00281     
00282   # Now the rest of the pairs until we find the endcoord
00283   
00284   while ((my $sf1 = shift(@pairs)) && ($foundend == 0)) {
00285     
00286     if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
00287       $foundend = 1;
00288     }
00289     
00290     my $startcoord;
00291     my $endcoord;
00292     
00293     if ($sf1->hstrand == 1) {
00294       $startcoord = $sf1->start;
00295       $endcoord   = $sf1->end;
00296     } else {
00297       $startcoord = $sf1->end;
00298       $endcoord   = $sf1->start;
00299     }
00300     
00301     if ($foundend) {
00302       $endcoord = $self->cDNA2genomic($feature->end);
00303     }
00304     
00305     #   #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
00306     
00307     my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
00308                                          -start   => $startcoord,
00309                                          -end     => $endcoord,
00310                                          -strand  => $feature->strand);
00311     push(@newfeatures,$tmpf);
00312   }
00313   #print STDERR "Have ".@newfeatures." features from ".$feature."\n";
00314   return @newfeatures;
00315 }
00316 
00317 
00318 sub convert_FeaturePair {
00319   my ($self,$pair) = @_;
00320 
00321   my $hstrand = $self->get_hstrand;
00322   
00323   my $feat = $self->create_Feature($pair->start, $pair->end, $pair->strand,
00324                                    $pair->slice);
00325   my @newfeatures = $self->convert_cDNA_feature($feat);
00326   my @newpairs;
00327   
00328   my $hitpairaln  = new Bio::EnsEMBL::Analysis::PairAlign;
00329   $hitpairaln->addFeaturePair($pair);
00330   
00331   foreach my $new (@newfeatures) {
00332     
00333     # Now we want to convert these cDNA coords into hit coords
00334     
00335     my $hstart1 = $self->genomic2cDNA($new->start);
00336     my $hend1   = $self->genomic2cDNA($new->end);
00337     
00338     my $hstart2 = $hitpairaln->genomic2cDNA($hstart1);
00339     my $hend2   = $hitpairaln->genomic2cDNA($hend1);
00340     
00341     # We can now put the final feature together
00342     
00343     my $finalstrand = $hstrand * $pair->strand * $pair->hstrand;
00344     
00345     if ($hstart2 > $hend2) {
00346       my $tmp = $hstart2;
00347       $hstart2 = $hend2;
00348       $hend2   = $tmp;
00349     }
00350     
00351     my $finalpair = $self->create_FeaturePair($new->start, $new->end, 
00352                                               $new->strand,
00353                                               $hstart2, $hend2, 
00354                                               $finalstrand, $pair->score);
00355     
00356     push(@newpairs,$finalpair);
00357     
00358   }
00359   
00360   return @newpairs;
00361 }
00362 
00363 sub create_FeaturePair {
00364     my ($self, $start, $end, $strand, $hstart, $hend, 
00365         $hstrand, $score) = @_;
00366    
00367     my $fp = Bio::EnsEMBL::FeaturePair->new(
00368                                             -start    => $start,
00369                                             -end      => $end,
00370                                             -strand   => $strand,
00371                                             -hstart   => $hstart,
00372                                             -hend     => $hend,
00373                                             -hstrand  => $hstrand,
00374                                             -score    => $score,
00375                                            );
00376 
00377    
00378     return $fp;
00379 }
00380 
00381 sub create_Feature{
00382   my ($self, $start, $end, $strand,  $slice) = @_;
00383 
00384   my $feat = new Bio::EnsEMBL::Feature(-start   => $start,
00385                                        -end     => $end,
00386                                        -strand  => $strand,
00387                                        -slice   => $slice,
00388                                       );
00389   return $feat;
00390 }
00391 
00392 1;
00393 
00394 
00395 
00396 
00397 
00398 
00399 
00400