Archive Ensembl HomeArchive Ensembl Home
ConservationScoreAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::DBSQL::ConservationScoreAdaptor - Object adaptor to access data in the conservation_score table
00022 
00023 =head1 SYNOPSIS
00024 
00025   Connecting to the database using the Registry
00026 
00027      use Bio::EnsEMBL::Registry;
00028  
00029      my $reg = "Bio::EnsEMBL::Registry";
00030 
00031       $reg->load_registry_from_db(-host=>"ensembldb.ensembl.org", -user=>"anonymous");
00032 
00033       my $conservation_score_adaptor = $reg->get_adaptor(
00034          "Multi", "compara", "ConservationScore");
00035 
00036   Store data in the database
00037 
00038      $conservation_score_adaptor->store($conservation_score);
00039 
00040   To retrieve score data from the database using the default display_size
00041      $conservation_scores = $conservation_score_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($method_link_species_set, $slice);
00042 
00043   To retrieve one score per base in the slice
00044      $conservation_scores = $conservation_score_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($method_link_species_set, $slice, $slice->end-$slice->start+1);
00045   Print the scores
00046    foreach my $score (@$conservation_scores) {
00047       printf("position %d observed %.4f expected %.4f difference %.4f\n",  $score->position, $score->observed_score, $score->expected_score, $score->diff_score);
00048    }
00049 
00050   A simple example script for extracting scores from a slice can be found in ensembl-compara/scripts/examples/getConservationScores.pl
00051 
00052 =head1 DESCRIPTION
00053 
00054 This module is used to access data in the conservation_score table. The scores are stored in the database as LITTLE ENDIAN.
00055 Each score is represented by a Bio::EnsEMBL::Compara::ConservationScore. The position and an observed, expected score and a difference score (expected-observed) is stored for each column in a multiple alignment. Not all columns in an alignment have a score (for example, if there is insufficient coverage) and termed here as 'uncalled'. 
00056 In order to speed up processing of the scores over large regions, the scores are stored in the database averaged over window_sizes of 1 (no averaging), 10, 100 and 500. When retrieving the scores, the most appropriate window_size is estimated from the length of the alignment or slice and the number of scores requested, given by the display_size. There is no need to specify the window_size directly. If the number of scores requested (display_size) is smaller than the alignment length or slice length, the scores will be either averaged if display_type = "AVERAGE" or the maximum value taken if display_type = "MAX". Scores in uncalled regions are not returned. To return a score for each column in an alignment, the display_size should be set to be the same size as the alignment length or slice length. 
00057 
00058 =head1 APPENDIX
00059 
00060 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
00061 
00062 =cut
00063 
00064 
00065 # Let the code begin...
00066 
00067 
00068 package Bio::EnsEMBL::Compara::DBSQL::ConservationScoreAdaptor;
00069 use vars qw(@ISA);
00070 use strict;
00071 
00072 use POSIX qw(floor);
00073 
00074 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00075 use Bio::EnsEMBL::Compara::ConservationScore;
00076 use Bio::EnsEMBL::Utils::Exception qw(throw warning info deprecate);
00077 use Config;
00078 
00079 #global variables
00080 
00081 #store as 4 byte float. If change here, must also change in 
00082 #ConservationScore.pm
00083 my $_pack_size = 4;
00084 #my $_pack_type = "f<"; # not possible until perl 5.10. Avoid for now
00085 my $_pack_type = "f";
00086 
00087 my $_bucket; 
00088 my $_score_index = 0;
00089 #my $_no_score_value = 0.0; #value if no score
00090 my $_no_score_value = undef; #value if no score
00091 
00092 my $PACKED = 1;
00093 
00094 #Check endian-ness. Always write data to database in LITTLE ENDIAN 
00095 #Byteorders "1234" and "12345678" are little-endian; "4321" and "87654321" are big-endian.
00096 my $is_little_endian = 1;
00097 if ($Config{byteorder} eq "87654321" or $Config{byteorder} eq "4321") {
00098     $is_little_endian = 0;
00099 }
00100 
00101 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
00102 
00103 
00104 
00105 =head2 delete_by_genomic_align_block_id
00106 
00107   Arg  1     : int $genomic_align_block_id
00108   Example    : $conservation_score_adaptor->delete_by_genomic_align_block_id(123);
00109   Description: Delete all the scores related to this GenomicAlignBlock object
00110   Returntype : int (number of deleted rows, not scores)
00111   Exceptions : throw if not $genomic_align_block_id
00112   Status     : Stable
00113 
00114 =cut
00115 
00116 sub delete_by_genomic_align_block_id {
00117   my ($self, $genomic_align_block_id) = @_;
00118 
00119   unless (defined $genomic_align_block_id) {
00120     throw("Must define a genomic_align_block_id");
00121   }
00122 
00123   my $sql = "DELETE FROM conservation_score WHERE genomic_align_block_id = ?";
00124   my $sth = $self->prepare($sql);
00125   my $rv = $sth->execute($genomic_align_block_id);
00126   $sth->finish;
00127 
00128   return $rv;
00129 }
00130 
00131 =head2 fetch_all_by_MethodLinkSpeciesSet_Slice
00132 
00133   Arg  1     : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set 
00134   Arg  2     : Bio::EnsEMBL::Slice $slice
00135   Arg  3     : (opt) integer $display_size (default 700)
00136   Arg  4     : (opt) string $display_type (one of "AVERAGE" or "MAX") (default "AVERAGE")
00137   Arg  5     : (opt) integer $window_size
00138   Exceptions : warning if window_size is not valid
00139   Example    : my $conservation_scores =
00140                     $conservation_score_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($method_link_species_set, $slice, $slice->end-$slice->start+1);
00141   Description: Retrieve the corresponding 
00142                Bio::EnsEMBL::Compara::ConservationScore objects. 
00143                Each conservation score object contains a position in slice 
00144                coordinates, the observed_score, the expected_score and the 
00145                diff_score (or conservation score) calculated as the 
00146                (expected_score - observed_score).
00147                The method_link_species_set is obtained
00148                using the method_link type of "GERP_CONSERVATION_SCORE". 
00149                For example, this could be obtained for the 10 way PECAN 
00150                alignment, using:
00151                my $mlss = $mlss_adaptor->fetch_by_method_link_type_registry_aliases("GERP_CONSERVATION_SCORE", ["human", "chimp", "rhesus", "cow", "dog", "mouse", "rat", "opossum", "platypus", "chicken"]);
00152 
00153                Display_size defines the number of scores that will be returned.
00154                To return a score for each column in an alignment the display_size 
00155                should be set to be the same size as the slice length eg ($slice->end-$slice->start+1).
00156                If the slice length is larger than the display_size, the scores 
00157                will either be averaged if the display_type is "AVERAGE" or the 
00158                maximum taken if display_type is "MAXIMUM". 
00159                Window_size defines which set of pre-averaged scores to use. 
00160                Valid values are 1, 10, 100 or 500 although there is no need to 
00161                define the window_size because the program will select the most 
00162                appropriate window_size to use based on the slice length and the
00163                display_size for example, a slice length of 1000000 and 
00164                display_size of 1000 will automatically use a window_size of 500.
00165                Slice positions which have no scores are not returned.
00166                The min and max y axis values for the array of 
00167                conservation score objects are set in the first conservation 
00168                score object (index 0). 
00169 
00170   Returntype : ref. to an array of Bio::EnsEMBL::Compara::ConservationScore objects. 
00171   Caller     : object::methodname
00172   Status     : At risk
00173 
00174 =cut
00175 
00176 sub fetch_all_by_MethodLinkSpeciesSet_Slice {
00177     my ($self, $method_link_species_set, $slice, $display_size, $display_type, $window_size) = @_;
00178 
00179     my $scores = [];
00180 
00181     #need to convert conservation score mlss to the corresponding multiple 
00182     #alignment mlss
00183     my $key = "gerp_" . $method_link_species_set->dbID;
00184 
00185     my $ma_mlss_id = $self->db->get_MetaContainer->list_value_by_key($key);
00186     my $ma_mlss;
00187     if (@$ma_mlss_id) {
00188     $ma_mlss = $self->db->get_MethodLinkSpeciesSet->fetch_by_dbID($ma_mlss_id->[0]);
00189     } else {
00190     return $scores;
00191     }
00192 
00193     my $light_genomic_aligns = $self->_get_all_ref_genomic_aligns($ma_mlss, $slice);
00194 
00195     if (scalar(@$light_genomic_aligns == 0)) {
00196     #print "no genomic_align_blocks found for this slice\n";
00197     return $scores;
00198     }
00199 
00200     #default display_size is 700
00201     if (!defined $display_size) {
00202     $display_size = 700;
00203     }
00204 
00205      #default display_mode is AVERAGE
00206     if (!defined $display_type) {
00207     $display_type = "AVERAGE";
00208     }
00209 
00210     #set up bucket object for storing bucket_size number of scores 
00211     my $bucket_size = ($slice->end-$slice->start+1)/$display_size;
00212 
00213     #default window size is the largest bucket that gives at least 
00214     #display_size values ie get speed but reasonable resolution
00215     my @window_sizes = (1, 10, 100, 500);
00216 
00217     #check if valid window_size
00218     my $found = 0;
00219     if (defined $window_size) {
00220     foreach my $win_size (@window_sizes) {
00221         if ($win_size == $window_size) {
00222         $found = 1;
00223         last;
00224         }
00225     }
00226     if (!$found) {
00227         warning("Invalid window_size $window_size");
00228         return $scores;
00229     }
00230     }
00231     
00232     if (!defined $window_size) {
00233     #set window_size to be the largest for when for loop fails
00234     $window_size = $window_sizes[scalar(@window_sizes)-1];
00235     for (my $i = 1; $i < scalar(@window_sizes); $i++) {
00236         if ($bucket_size < $window_sizes[$i]) {
00237         $window_size = $window_sizes[$i-1];
00238         last;
00239         }
00240     }
00241     }
00242 
00243     #print "window_size $window_size bucket_size $bucket_size slice length " . ($slice->end - $slice->start + 1) . "\n";
00244 
00245     $_bucket = {diff_score => 0,
00246         start_pos => 0,
00247         end_pos => 0,
00248         start_seq_region_pos => $slice->start,
00249         end_seq_region_pos => $slice->end,
00250         called => 0,
00251         cnt => 0,
00252         size => $bucket_size,
00253            current => 0};
00254     foreach my $light_genomic_align (@$light_genomic_aligns) { 
00255     my $genomic_align_block_id = $light_genomic_align->{genomic_align_block_id};
00256     my $cigar_line = $light_genomic_align->{cigar_line};
00257     my $dnafrag_start = $light_genomic_align->{dnafrag_start};
00258     my $dnafrag_end = $light_genomic_align->{dnafrag_end};
00259     my $dnafrag_strand = $light_genomic_align->{dnafrag_strand};
00260     my $gab_length = $light_genomic_align->{length};
00261 
00262     my $conservation_scores = $self->_fetch_all_by_GenomicAlignBlockId_WindowSize($genomic_align_block_id, $window_size, $PACKED);
00263     if (scalar(@$conservation_scores) == 0) {
00264         next;
00265     }
00266 
00267     #reset _score_index for new conservation_scores
00268     $_score_index = 0;
00269 
00270     #print "slice start " . $slice->start . " " . $slice->end . " " . $slice->strand . " strand $dnafrag_strand cigar " . substr($cigar_line,0, 10) . "\n";
00271 
00272     #if dnafrag_strand is -1, reverse cigar_line, scores so that they 
00273     #are always on the forward strand
00274     if ($dnafrag_strand == -1) {
00275 
00276         $cigar_line = join("", reverse ($cigar_line=~(/(\d*[GDMIX])/g)));
00277             $conservation_scores = _reverse($conservation_scores, $gab_length);
00278             $dnafrag_strand = 1;
00279     }
00280 
00281     #print "after slice start " . $slice->start . " " . $slice->end . " " . $slice->strand . " strand $dnafrag_strand cigar " . substr($cigar_line,0, 10) . "\n";
00282 
00283     #if want one score per base in the alignment, use faster method 
00284     #doesn't bother with any binning
00285     if ($window_size == 1 && ($display_size == ($slice->end - $slice->start + 1))) {
00286         $scores = _get_aligned_scores_from_cigar_line_fast($self, $cigar_line, $dnafrag_start, $dnafrag_end, $slice->start, $slice->end, $conservation_scores, $genomic_align_block_id, $gab_length, $display_type, $scores);
00287     } else {
00288         $scores = _get_aligned_scores_from_cigar_line($self, $cigar_line, $dnafrag_start, $dnafrag_end, $slice->start, $slice->end, $conservation_scores, $genomic_align_block_id, $gab_length, $display_type, $window_size, $scores);
00289         
00290     } 
00291     }
00292 
00293     if (scalar(@$scores) == 0) {
00294     return $scores;
00295     }
00296 
00297     #need to reverse scores
00298     if ($slice->strand == -1) {
00299     #reverse scores array
00300     @$scores = reverse(@$scores);
00301     #reverse positions
00302     foreach my $score (@$scores) {
00303         $score->position($slice->end - $score->seq_region_pos + 1);
00304     }
00305     #print "reverse scores\n";
00306     }
00307     
00308     #remove _no_score_values from aligned_scores array
00309     my $i = 0;
00310      while ($i < scalar(@$scores)) {
00311     if (!defined($_no_score_value) && 
00312         !defined($scores->[$i]->diff_score)) {
00313         splice @$scores, $i, 1;
00314     } elsif (defined($_no_score_value) && 
00315          $scores->[$i]->diff_score == $_no_score_value) {
00316         splice @$scores, $i, 1;
00317     } else {
00318         $i++;
00319     }
00320      }
00321 
00322     #Find the min and max scores for y axis scaling. Save in first
00323     #conservation score object
00324     my ($min_y_axis, $max_y_axis) =  _find_min_max_score($scores);
00325 
00326     #add min and max scores to the first conservation score object
00327     if ((scalar @$scores) > 0) {
00328     $scores->[0]->y_axis_min($min_y_axis);
00329     $scores->[0]->y_axis_max($max_y_axis);
00330     }
00331 
00332     return ($scores);
00333 }
00334 
00335 =head2 fetch_all_by_GenomicAlignBlock
00336 
00337   Arg  1     : Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
00338   Arg  2     : (opt) integer $align_start (default 1) 
00339   Arg  3     : (opt) integer $align_end (default $genomic_align_block->length)
00340   Arg  4     : (opt) integer $slice_length (default $genomic_align_block->length)
00341   Arg  5     : (opt) integer $display_size (default 700)
00342   Arg  6     : (opt) string $display_type (one of "AVERAGE" or "MAX") (default "AVERAGE")
00343   Arg  7     : (opt) integer $window_size
00344   Example    : my $conservation_scores =
00345                     $conservation_score_adaptor->fetch_all_by_GenomicAlignBlock($genomic_align_block, $align_start, $align_end, $slice_length, $slice_length);
00346   Description: Retrieve the corresponding
00347                Bio::EnsEMBL::Compara::ConservationScore objects. 
00348            Each conservation score object contains a position in alignment
00349                coordinates, the observed_score, the expected_score and the 
00350                diff_score (conservation score) calculated as 
00351            (expected_score - observed_score).
00352                The $align_start and $align_end parameters give the start and 
00353                end of a region within a genomic_align_block and should be in 
00354                alignment coordinates.
00355                The $slice_length is the total length of the region to be 
00356                displayed and may span several individual genomic align blocks.
00357                It is used to automatically calculate the window_size.
00358                Display_size is the number of scores that will be returned.
00359                To return a score for each column in an alignment the display_size 
00360                should be set to be the same size as the alignment length. If 
00361                the $slice_length is larger than the $display_size, the scores 
00362                will either be averaged if the display_type is "AVERAGE" or the 
00363                maximum taken if display_type is "MAXIMUM". 
00364            Window_size defines which set of pre-averaged scores to use. 
00365            Valid values are 1, 10, 100 or 500. There is no need to define 
00366                the window_size because the program will select the most 
00367                appropriate window_size to use based on the slice_length and the
00368                display_size. 
00369                Alignment positions which have no scores are not returned.
00370                The min and max y axis values for 
00371                the array of conservation score objects are set in the first 
00372                conservation score object (index 0). 
00373   Returntype : ref. to an array of Bio::EnsEMBL::Compara::ConservationScore 
00374                objects. 
00375   Caller     : object::methodname
00376   Status     : At risk
00377 
00378 =cut
00379 sub fetch_all_by_GenomicAlignBlock {
00380     my ($self, $genomic_align_block, $start, $end, $slice_length,
00381     $display_size, $display_type, $window_size) = @_;
00382 
00383 
00384     #print "fetch_all_by_GenomicAlignBlock start $start end $end\n";
00385     my $scores = [];
00386 
00387     #default display_size is 700
00388     if (!defined $display_size) {
00389     $display_size = 700;
00390     }
00391 
00392     #default display_mode is AVERAGE
00393     if (!defined $display_type) {
00394     $display_type = "AVERAGE";
00395     }
00396 
00397     #default start is 1
00398     if (!defined $start) {
00399     $start = 1;
00400     }
00401 
00402     #default end is the genomic_align_block length    
00403     if (!defined $end) {
00404     $end = $genomic_align_block->length;
00405     }
00406 
00407     #default slice_length is the genomic_align_block length
00408     if (!defined $slice_length) {
00409     $slice_length = $genomic_align_block->length;
00410     }
00411 
00412     #keep track of original start and end
00413     my $align_start = $start;
00414     my $align_end = $end;
00415 
00416     #print "restricted start " . $genomic_align_block->{'restricted_aln_start'} . " end " . $genomic_align_block->{'restricted_aln_end'} . " gab length " . $genomic_align_block->length . "\n";
00417 
00418     #If the GenomicAlignBlock has been restricted, use these values in 
00419     #align_start and align_end
00420     if (defined $genomic_align_block->{'restricted_aln_start'}) {
00421     $align_start += $genomic_align_block->{'restricted_aln_start'} - 1;
00422     }
00423     if (defined $genomic_align_block->{'restricted_aln_end'}) {
00424     
00425     $align_end = $genomic_align_block->{'restricted_aln_end'} - ($genomic_align_block->length-$align_end);
00426     }
00427 
00428     #set up bucket object for storing bucket_size number of scores 
00429     my $bucket_size = ($slice_length)/$display_size;
00430     
00431     #default window size is the largest bucket that gives at least 
00432     #display_size values ie get speed but reasonable resolution
00433     my @window_sizes = (1, 10, 100, 500);
00434 
00435     #check if valid window_size
00436     my $found = 0;
00437     if (defined $window_size) {
00438     foreach my $win_size (@window_sizes) {
00439         if ($win_size == $window_size) {
00440         $found = 1;
00441         last;
00442         }
00443     }
00444     if (!$found) {
00445         warning("Invalid window_size $window_size");
00446         return $scores;
00447     }
00448     }
00449  
00450     if (!defined $window_size) {
00451     #set window_size to be the largest for when for loop fails
00452     $window_size = $window_sizes[scalar(@window_sizes)-1];
00453     for (my $i = 1; $i < scalar(@window_sizes); $i++) {
00454         if ($bucket_size < $window_sizes[$i]) {
00455         $window_size = $window_sizes[$i-1];
00456         last;
00457         }
00458     }
00459     }
00460 
00461     $_bucket = {diff_score => 0,
00462         start_pos => 0,
00463         end_pos => 0,
00464         start_seq_region_pos => 0,
00465         end_seq_region_pos => 0,
00466         called => 0,
00467         cnt => 0,
00468         size => $bucket_size,
00469            current => 0};
00470 
00471 
00472     #make sure reference genomic align has been set. If not set, set to be
00473     #first genomic_align
00474     my $reference_genomic_align = $genomic_align_block->reference_genomic_align;
00475     if (!$reference_genomic_align) {
00476     $genomic_align_block->reference_genomic_align($genomic_align_block->get_all_GenomicAligns->[0]);
00477     }
00478     #need this in case the dbID is not set ie if the $genomic_align_block has
00479     #been restricted
00480     my $gab_id;
00481     if (defined $genomic_align_block->{'dbID'}) {
00482     $gab_id = $genomic_align_block->{'dbID'};
00483     } else {
00484     $gab_id = $genomic_align_block->{'original_dbID'};
00485     }
00486 
00487     my $conservation_scores = $self->_fetch_all_by_GenomicAlignBlockId_WindowSize($gab_id, $window_size, $PACKED);
00488 
00489 
00490     if (scalar(@$conservation_scores) == 0) {
00491     return $scores;
00492     }
00493 
00494     #need to reverse conservation scores if reference species is complemented
00495     if ($genomic_align_block->get_original_strand == 0) {
00496     $conservation_scores = _reverse($conservation_scores);
00497 
00498     }
00499     
00500     #reset _score_index for new conservation_scores
00501     $_score_index = 0;
00502 
00503     #print "align_start $align_start align_end $align_end start $start end $end\n";
00504     $scores = $self->_get_alignment_scores($conservation_scores, $align_start, 
00505                        $align_end, $display_type, $window_size, 
00506                        $genomic_align_block);
00507 
00508     if (scalar(@$scores) == 0) {
00509     return $scores;
00510     }
00511 
00512     my $all_scores;
00513     foreach my $this_conservation_score (@$scores) {
00514     $this_conservation_score->position($this_conservation_score->position + $start - 1);
00515     push (@$all_scores, $this_conservation_score);
00516     }
00517  
00518    #Find the min and max scores for y axis scaling. Save in first
00519     #conservation score object
00520     my ($min_y_axis, $max_y_axis) =  _find_min_max_score($all_scores);
00521 
00522     #add min and max scores to the first conservation score object
00523     if ((scalar @$all_scores) > 0) {
00524     $all_scores->[0]->y_axis_min($min_y_axis);
00525     $all_scores->[0]->y_axis_max($max_y_axis);
00526     }
00527    return ($all_scores);
00528 
00529 }
00530 
00531 
00532 =head2 store
00533 
00534   Arg [1]    : Bio::EnsEMBL::Compara::ConservationScore $cs
00535   Example    : $csa->store($cs);
00536   Description: Stores a conservation score object in the compara database if
00537                it has not been stored already.  
00538   Returntype : none
00539   Exceptions : thrown if $genomic_align_block is not a 
00540                Bio::EnsEMBL::Compara::GenomicAlignBlock object
00541   Exceptions : thrown if the argument is not a Bio::EnsEMBL::Compara:ConservationScore
00542   Caller     : general
00543   Status     : At risk
00544 
00545 =cut
00546 
00547 sub store {
00548   my ($self,$cs) = @_;
00549 
00550   unless(defined $cs && ref $cs && 
00551      $cs->isa('Bio::EnsEMBL::Compara::ConservationScore') ) {
00552       $self->throw("Must have conservation score arg [$cs]");
00553   }
00554 
00555   my $genomic_align_block = $cs->genomic_align_block;
00556   my $window_size = $cs->window_size;
00557   my $position = $cs->{position};
00558 
00559   #check to see if gab, window_size and position have been defined (should be unique)
00560   unless($genomic_align_block && $window_size && $position) {
00561     $self->throw("conservation score must have a genomic_align_block, window_size and position");
00562   }
00563 
00564   #check if genomic_align_block is valid
00565   if (!$genomic_align_block->isa("Bio::EnsEMBL::Compara::GenomicAlignBlock")) {
00566     throw("[$genomic_align_block] is not a Bio::EnsEMBL::Compara::GenomicAlignBlock");
00567   }
00568   my $genomic_align_block_id = $genomic_align_block->dbID;
00569 
00570   #pack the diff and expected scores if not already packed
00571   my $exp_packed;
00572   my $diff_packed;
00573   
00574   if (!$cs->packed) {
00575       my @exp_scores = split ' ',$cs->expected_score;
00576       my @diff_scores = split ' ',$cs->diff_score;
00577 
00578       #Check if big or little endian
00579       if ($is_little_endian) {
00580       for (my $i = 0; $i < scalar(@exp_scores); $i++) {
00581           $exp_packed .= pack($_pack_type, $exp_scores[$i]);
00582           $diff_packed .= pack($_pack_type, $diff_scores[$i]);
00583       }
00584       } else {
00585       #Reverse bytes before storage on big_endian machines
00586       for (my $i = 0; $i < scalar(@exp_scores); $i++) {
00587           my $exp_packed_value = reverse(pack($_pack_type, $exp_scores[$i]));
00588           my $diff_packed_value = reverse(pack($_pack_type, $diff_scores[$i]));
00589           $exp_packed .= $exp_packed_value;
00590           $diff_packed .= $diff_packed_value;
00591       }
00592       }
00593   } else {
00594       $exp_packed = $cs->expected_score;
00595       $diff_packed = $cs->diff_score;
00596   }
00597 
00598   #store the conservation score
00599   my $sql = "INSERT into conservation_score (genomic_align_block_id,window_size,position,expected_score, diff_score) ". 
00600     " VALUES ('$genomic_align_block_id','$window_size', '$position', ?, ?)";
00601   my $sth = $self->prepare($sql);
00602   $sth->execute($exp_packed, $diff_packed);
00603   
00604   #update the conservation_score object so that it's adaptor is set
00605   $cs->adaptor($self);
00606 }
00607 
00608 #Internal methods
00609 
00610 =head2 _fetch_all_by_GenomicAlignBlockId_WindowSize
00611 
00612   Arg  1     : integer $genomic_align_block_id 
00613   Arg  2     : integer $window_size
00614   Arg  3     : (opt) boolean $packed (default 0)
00615   Example    : my $conservation_scores =
00616                     $conservation_score_adaptor->_fetch_all_by_GenomicAlignBlockId(23134);
00617   Description: Retrieve the corresponding
00618                Bio::EnsEMBL::Compara::ConservationScore objects. 
00619   Returntype : ref. to an array of Bio::EnsEMBL::Compara::ConservationScore objects. If $packed is true, return the scores in a packed format given by $_pack_size and $_pack_type.
00620   Exceptions : none
00621   Caller     : general
00622 
00623 =cut
00624 
00625 sub _fetch_all_by_GenomicAlignBlockId_WindowSize {
00626     my ($self, $genomic_align_block_id, $window_size, $packed) = @_;
00627     my $conservation_scores = [];
00628     my $exp_scores;
00629     my $diff_scores;
00630     
00631     #whether to return the scores in packed or unpacked format
00632     #default to unpacked (space delimited string of floats)
00633     if (!defined $packed) {
00634     $packed = 0;
00635     }
00636 
00637     my $sql = qq{
00638     SELECT
00639         genomic_align_block_id,
00640         window_size,
00641         position,
00642         expected_score,
00643         diff_score
00644     FROM
00645         conservation_score
00646     WHERE
00647         genomic_align_block_id = ?
00648     AND
00649         window_size = ?
00650     };
00651 
00652     my $sth = $self->prepare($sql);
00653     $sth->execute($genomic_align_block_id, $window_size);
00654     my $conservation_score;
00655 
00656     while (my @values = $sth->fetchrow_array()) {
00657 
00658     if (!$packed) {
00659         $exp_scores = _unpack_scores($values[3]);
00660         $diff_scores = _unpack_scores($values[4]);
00661     } else {
00662         $exp_scores = $values[3];
00663         $diff_scores = $values[4];
00664     }
00665 
00666     $conservation_score = Bio::EnsEMBL::Compara::ConservationScore->new_fast(
00667                        {'adaptor' => $self,
00668                     'genomic_align_block_id' => $values[0],
00669                     'window_size' => $values[1],
00670                     'position' => ($values[2] or 1),
00671                     'expected_score' => $exp_scores,
00672                     'diff_score' => $diff_scores,
00673                     'packed' => $packed});
00674     push(@$conservation_scores, $conservation_score);
00675     }
00676     
00677   #sort into numerical order based on position
00678   my @sorted_scores = sort {$a->{position} <=> $b->{position}} @$conservation_scores;
00679   return \@sorted_scores;
00680 }
00681 
00682 
00683 =head2 _find_min_max_score
00684 
00685   Arg  1     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
00686   Example    : my ($min, $max) =  _find_min_max_score($scores);
00687   Description: find the min and max scores used for y axis scaling
00688   Returntype : (float, float)
00689   Exceptions :
00690   Caller     : general
00691   Status     : At risk
00692 
00693 =cut
00694 
00695 sub _find_min_max_score {
00696     my ($scores) = @_;
00697     my $min; 
00698     my $max;
00699 
00700     foreach my $score (@$scores) {
00701     #find min and max of diff scores
00702     if (defined $score->{diff_score}) {
00703         #if min hasn't been defined yet, then define min and max
00704         unless (defined $min) {
00705         $min = $score->{diff_score};
00706         $max = $score->{diff_score};
00707         }
00708         if ($min > $score->{diff_score}) {
00709         $min = $score->{diff_score};
00710         }
00711         if ($max < $score->{diff_score}) {
00712         $max = $score->{diff_score};
00713         }
00714     }
00715     }
00716 
00717     return ($min, $max);
00718 }
00719 
00720 =head2 _reverse
00721 
00722   Arg  1     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
00723   Arg  2     : int $genomic_align_block_length (number of scores)
00724   Example    : $conservation_scores = _reverse($conservation_scores);
00725   Description: reverse the conservation scores for complemented sequences
00726   Returntype : listref of Bio::EnsEMBL::Compara::ConservationScore objects
00727   Exceptions : 
00728   Caller     : general
00729   Status     : At risk
00730 
00731 =cut
00732 
00733 sub _reverse {
00734     my ($scores, $genomic_align_block_length) = @_;
00735 
00736     #reverse each conservation_score 
00737     foreach my $s (@$scores) {
00738     $s->reverse($genomic_align_block_length);
00739     }
00740     #reverse array so position values go from small to large
00741     my @rev = reverse @$scores;
00742 
00743     return \@rev;
00744 }
00745 
00746 =head2 _unpack_scores
00747 
00748   Arg  1     : string $scores
00749   Example    : $exp_scores = _unpack_scores($scores);
00750   Description: unpack score values retrieved from a database
00751   Returntype : space delimited string of floats
00752   Exceptions : none
00753   Caller     : general
00754   Status     : At risk
00755 
00756 =cut
00757 
00758 sub _unpack_scores {
00759     my ($scores) = @_;
00760     if (!defined $scores) {
00761     return "";
00762     }
00763     my $num_scores = length($scores)/$_pack_size;
00764 
00765     my $score = "";
00766     if ($is_little_endian) {
00767     for (my $i = 0; $i < $num_scores * $_pack_size; $i+=$_pack_size) {
00768         my $value = substr $scores, $i, $_pack_size;
00769         $score .= unpack($_pack_type, $value) . " ";
00770     }
00771     } else {
00772     for (my $i = 0; $i < $num_scores * $_pack_size; $i+=$_pack_size) {
00773         my $value = reverse(substr $scores, $i, $_pack_size);
00774         $score .= unpack($_pack_type, $value) . " ";
00775     }
00776     }
00777 
00778     return $score;
00779 }
00780 
00781 =head2 _unpack_score
00782 
00783   Arg  1     : string $score
00784   Example    : $exp_scores = _unpack_score($score);
00785   Description: unpack score values retrieved from a database
00786   Returntype : space delimited string of floats
00787   Exceptions : none
00788   Caller     : general
00789   Status     : At risk
00790 
00791 =cut
00792 
00793 sub _unpack_score {
00794     my ($pack_type, $score) = @_;
00795 
00796     if ($is_little_endian) {
00797     return unpack($pack_type, $score);
00798     } else {
00799     return unpack($pack_type, reverse($score));
00800     }
00801 }
00802 
00803 
00804 =head2 _find_score_index
00805 
00806   Arg  1     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
00807   Arg  2     : int $num_scores (number of scores in the array)
00808   Arg  3     : int $score_lengths number of scores in each row of the array
00809   Arg  4     : int $pos (position to find)
00810   Arg  5     : int $win_size (window size)
00811   Example    : $exp_scores = _unpack_scores($scores);
00812   Description: find the score index (row) that contains $pos in alignment coords  Returntype : int 
00813   Exceptions : none
00814   Caller     : general
00815   Status     : At risk
00816 
00817 =cut
00818 
00819 sub _find_score_index {
00820     my ($scores, $num_scores, $score_lengths, $pos, $win_size) = @_;
00821     my $i;
00822     my $length;
00823 
00824     #find the score index (row) that contains $pos in alignment coords
00825     #use global variable $_score_index to keep track of where I am in the scores 
00826     #array
00827 
00828 
00829     #special case for first window size 
00830     if ($pos < $scores->[0]->{position} && $pos > ($scores->[0]->{position} - $win_size)) {
00831     return 0;
00832     }
00833     
00834     for ($i = $_score_index; $i < $num_scores; $i++) {
00835       my $this_position = $scores->[$i]->{position};
00836     $length = ($score_lengths->[$i] - 1) * $win_size;
00837 
00838     if ($pos >= $this_position && $pos <= $this_position + $length) {
00839         $_score_index = $i;
00840         return ($i);
00841     }
00842 
00843     #smaller than end so there is no score for this position
00844     if ($pos < ($this_position + $length)) {
00845         $_score_index = $i;
00846         return -1;
00847     }
00848     }
00849     return -1;
00850 }
00851 
00852 
00853 =head2 _print_scores
00854 
00855   Arg  1     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
00856   Arg  2     : boolean $packed (0 if not packed, 1 if packed)
00857   Example    : $conservation_scores = _reverse($conservation_scores);
00858   Description: print scores (unpack first if necessary)
00859   Returntype : none
00860   Exceptions : none
00861   Caller     : general
00862   Status     : At risk
00863 
00864 =cut
00865 
00866 sub _print_scores {
00867     my ($scores, $packed) = @_;
00868     my $num_scores = scalar(@$scores);
00869     my $cnt;
00870     my ($start, $end);
00871     my $i;
00872     my @values;
00873     my $total_scores = 0;
00874 
00875     print "num scores $num_scores\n";
00876     for ($cnt = 0; $cnt < $num_scores; $cnt++) {
00877     if ($packed) {
00878         $end = (length($scores->[$cnt]->expected_score) / 4);
00879     } else {
00880         @values = split ' ', $scores->[$cnt]->diff_score;
00881         $end = scalar(@values);
00882     }
00883     print "row $cnt length $end\n";
00884     $total_scores += $end;
00885     for ($i = 0; $i < $end; $i++) {
00886         my $score;
00887         if ($packed) {
00888         my $value = substr $scores->[$cnt]->expected_score, $i*$_pack_size, $_pack_size;
00889         $score = _unpack_score($_pack_type, $value);
00890         } else {
00891         $score = $values[$i];
00892         }
00893         print "$i score $score \n";
00894     }
00895     }
00896     print "Total $total_scores\n";
00897 
00898 }
00899 
00900 =head2 _get_aligned_scores_from_cigar_line
00901 
00902   Arg  1     : string $cigar_line (cigar string from current alignment block)
00903   Arg  2     : int $start_region (start of genomic_align_block (chr coords))
00904   Arg  3     : int $end_region (end of genomic_align_block (chr coords))
00905   Arg  4     : int $start_slice (start of slice (chr coords)
00906   Arg  5     : int $end_slice (end of slice (chr coords))
00907   Arg  6     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
00908   Arg  7     : int $genomic_align_block_id (genomic align block id of current alignment block)
00909   Arg  8     : int $genomic_align_block_length (length of current alignment block)
00910   Arg  9     : string $display_type (either AVERAGE or MAX (plot average or max value))
00911   Arg 10     : int $win_size (window size used)
00912   Arg 11     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores in slice coords
00913 
00914   Example    : $scores = $self->_get_aligned_scores_from_cigar_line($genomic_align->cigar_line, $genomic_align->dnafrag_start, $genomic_align->dnafrag_end, $slice->start, $slice->end, $conservation_scores, $genomic_align_block->dbID, $genomic_align_block->length, $display_type, $window_size, $scores);
00915   Description: Convert conservation scores from alignment coordinates into species specific chromosome (slice) coordinates for an alignment genomic_align_block
00916   Returntype : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
00917   Exceptions : none
00918   Caller     : general
00919   Status     : At risk
00920 
00921 =cut
00922 
00923 sub _get_aligned_scores_from_cigar_line {
00924     my ($self, $cigar_line, $start_region, $end_region, $start_slice, $end_slice, $scores, $genomic_align_block_id, $genomic_align_block_length, $display_type, $win_size, $aligned_scores) = @_;
00925 
00926     return undef if (!$cigar_line);
00927     
00928     my $num_aligned_scores = scalar(@$aligned_scores);
00929     my @cig = ( $cigar_line =~ /(\d*[GMDIX])/g );
00930 
00931     #start and end of region in alignment coords
00932     my $align_start = 1;
00933     my $align_end = $genomic_align_block_length;
00934 
00935     my $aligned_score;
00936 
00937     my $cs_index;    #conservation score row index
00938     my $num_scores = scalar(@$scores); #number of conservation score rows
00939 
00940     #position in alignment coords to the end of cigar block
00941     my $total_pos;  
00942     #position in chromosome coords to the end of cigar block
00943     my $total_chr_pos = $start_region; 
00944 
00945     my $current_pos; #current position in alignment coords
00946     my $chr_pos = $start_region; #current position in chromosome coords
00947     my $prev_position = 0; #remember previous chr position for dealing with deletions
00948 
00949     my $cigType; #type of cigar element
00950     my $cigLength; #length of cigar element
00951 
00952     my $i;
00953     my $csBlockCnt; #offset into conservation score string
00954     my $diff_score; #store difference score
00955     my @diff_scores;
00956     my $exp_score; #store expected score
00957     my @exp_scores;
00958 
00959     #start and end of the alignment in chromosome coords
00960     my $chr_start = $start_region; 
00961     my $chr_end = $end_region;
00962     
00963     #set start and end to be the minimum of alignment or slice
00964     if ($start_slice > $start_region) {
00965     $chr_start = $start_slice;
00966     }
00967     if ($end_slice < $end_region) {
00968     $chr_end = $end_slice;
00969     }
00970 
00971     #store the number of values in each row in the score array
00972     my $score_lengths;
00973     for (my $j = 0; $j < $num_scores; $j++) {
00974     my $length = 0;
00975     if (defined($scores->[$j]->{diff_score})) {
00976         if ($PACKED) {
00977         $length = length($scores->[$j]->{diff_score})/$_pack_size;
00978         } else {
00979         my @split_scores = split ' ', $scores->[$j]->{diff_score};
00980         $length = scalar(@split_scores);
00981         }
00982     }
00983     push (@$score_lengths, $length);
00984     }
00985 
00986     #fill in region between previous alignment and this alignment with uncalled values
00987 
00988     #08.06.07 don't need to add bucket->{cnt} here
00989     #my $prev_chr_pos = $_bucket->{start_seq_region_pos}+$_bucket->{cnt};
00990     my $prev_chr_pos = $_bucket->{start_seq_region_pos};
00991 
00992     #08.06.07 Fixed bug: need to add missing values only from
00993     #the next position to chr_start otherwise you use prev_chr_pos twice.
00994     #for (my $i = $prev_chr_pos; $i < $chr_start; $i+=$win_size) {
00995 
00996     for (my $i = $prev_chr_pos+$win_size; $i < $chr_start; $i+=$win_size) {
00997     $aligned_score = _add_to_bucket($self, $display_type, $_no_score_value, $_no_score_value, $i, $start_slice, scalar(@$aligned_scores), $genomic_align_block_id, $win_size);
00998     if ($aligned_score) {
00999         #need this to ensure that the aligned_scores array is the 
01000         #correct size (passed into _add_to_bucket)
01001         push(@$aligned_scores, $aligned_score);
01002     }
01003     }
01004     
01005     #convert start_region into alignment coords and initialise total_chr_pos
01006     while ($total_chr_pos <= $chr_start) {
01007 
01008     my $cigElem = $cig[$i++];
01009 
01010     $cigType = substr( $cigElem, -1, 1 );
01011     $cigLength = substr( $cigElem, 0 ,-1 );
01012     $cigLength = 1 unless ($cigLength =~ /^\d+$/);
01013 
01014     $current_pos += $cigLength;
01015     $total_pos += $cigLength;
01016     if( $cigType eq "M" || $cigType eq "I") {
01017         $total_chr_pos += $cigLength;
01018     }
01019     }
01020     
01021     #find start of region in alignment coords 
01022     my $start_offset = $total_chr_pos - $chr_start;
01023     if ($cigType eq "M") {
01024     $align_start = (int(($total_pos - $start_offset + $win_size)/$win_size) * $win_size);
01025     }
01026 
01027     #initialise start of region in chromosome coords
01028     $chr_pos = $chr_start;
01029 
01030     #loop round in alignment coords, incrementing by win_size until either
01031     #reached the end of the alignment or end of the slice
01032     #12/03/2007 fixed bug in line below, where $chr_pos <= $chr_end. This gave
01033     #one too many scores when the last bucket position equaled the slice length
01034     #eg for slice of 1000, last bucket position = 1000, get 1001 scores but
01035     #slice of 2000, last bucket position 1999, get 1000 scores.
01036     for ($current_pos = $align_start; $current_pos <= $align_end && $chr_pos < $chr_end; $current_pos += $win_size) {
01037 
01038     #find conservation score row index containing current_pos. Returns -1
01039     #if no score found
01040     $cs_index = _find_score_index($scores, $num_scores, $score_lengths, $current_pos, $win_size);
01041 
01042     #if a score has been found, find the score in the score string and 
01043     #unpack it.
01044     unless ($cs_index == -1) {
01045         $csBlockCnt = int(($current_pos - $scores->[$cs_index]->{position})/$win_size);
01046 
01047         my $value;
01048         if ($PACKED) {
01049         $value = substr $scores->[$cs_index]->expected_score, $csBlockCnt*$_pack_size, $_pack_size;
01050         $exp_score = _unpack_score($_pack_type, $value);
01051         $value = substr $scores->[$cs_index]->{diff_score}, $csBlockCnt*$_pack_size, $_pack_size;
01052         $diff_score = _unpack_score($_pack_type, $value);
01053         } else {
01054         @exp_scores = split ' ', $scores->[$cs_index]->exp_score;
01055         $exp_score = $exp_scores[$csBlockCnt];
01056         @diff_scores = split ' ', $scores->[$cs_index]->diff_score;
01057         $diff_score = $diff_scores[$csBlockCnt];
01058         } 
01059     }
01060 
01061     #find the next cigar block that is larger than current_pos
01062     while ($total_pos < $current_pos && $chr_pos < $chr_end) {  
01063         my $cigElem = $cig[$i++];
01064         
01065         $cigType = substr( $cigElem, -1, 1 );
01066         $cigLength = substr( $cigElem, 0 ,-1 );
01067         $cigLength = 1 unless ($cigLength =~ /^\d+$/);
01068 
01069         if ($cigType ne "I") {
01070         $total_pos += $cigLength;
01071         }
01072 
01073         if( $cigType eq "M"  || $cigType eq "I") {
01074         $total_chr_pos += $cigLength;
01075         }
01076     }
01077 
01078     #total_pos is > than current_pos, so if in match, must delete this
01079     #excess 
01080     if ($cigType eq "M") {
01081         $chr_pos = $total_chr_pos - ($total_pos - $current_pos + 1);
01082     } else {
01083         $chr_pos = $total_chr_pos - 1;
01084     }
01085 
01086     #now add the scores to the bucket
01087     if ($cigType eq "M") {
01088         if ($cs_index == -1) {
01089         #in cigar match but no conservation score so add _no_score_value to the bucket
01090         $aligned_score = _add_to_bucket($self, $display_type, $_no_score_value,$_no_score_value, $chr_pos, $start_slice, scalar(@$aligned_scores), $genomic_align_block_id, $win_size);
01091         if ($aligned_score) {
01092             push(@$aligned_scores, $aligned_score);
01093         }
01094         } else {
01095         #in cigar match and have conservation score
01096 
01097         $aligned_score = _add_to_bucket($self, $display_type, $exp_score, $diff_score, $chr_pos, $start_slice, scalar(@$aligned_scores), $genomic_align_block_id, $win_size);
01098         if ($aligned_score) {
01099             push(@$aligned_scores, $aligned_score);
01100         }
01101         }
01102     } else {
01103         #not in cigar match so only add the next conservation score or
01104         #_no_score_value if this isn't a score
01105         if ($prev_position != $chr_pos) {
01106         if ($cs_index == -1) {
01107             $aligned_score = _add_to_bucket($self, $display_type, $_no_score_value, $_no_score_value, $chr_pos, $start_slice, scalar(@$aligned_scores), $genomic_align_block_id, $win_size);
01108             if ($aligned_score) {
01109             push(@$aligned_scores, $aligned_score);
01110             }
01111         } else {
01112             $aligned_score = _add_to_bucket($self, $display_type, $exp_score, $diff_score, $chr_pos, $start_slice, scalar(@$aligned_scores), $genomic_align_block_id, $win_size);
01113             if ($aligned_score) {
01114             push(@$aligned_scores, $aligned_score);
01115             }
01116         }
01117         }
01118     }
01119     $prev_position = $chr_pos;
01120     }
01121 
01122     return $aligned_scores;
01123 }
01124 
01125 =head2 _get_aligned_scores_from_cigar_line_fast
01126 
01127   Arg  1     : string $cigar_line (cigar string from current alignment block)
01128   Arg  2     : int $start_region (start of genomic_align_block (chr coords))
01129   Arg  3     : int $end_region (end of genomic_align_block (chr coords))
01130   Arg  4     : int $start_slice (start of slice (chr coords)
01131   Arg  5     : int $end_slice (end of slice (chr coords))
01132   Arg  6     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
01133   Arg  7     : int $genomic_align_block_id (genomic align block id of current alignment block)
01134   Arg  8     : int $genomic_align_block_length (length of current alignment block)
01135   Arg  9     : string $display_type (either AVERAGE or MAX (plot average or max value))
01136   Arg 10     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores in slice coords
01137 
01138   Example    : $scores = $self->_get_aligned_scores_from_cigar_line_fast($genomic_align->cigar_line, $genomic_align->dnafrag_start, $genomic_align->dnafrag_end, $slice->start, $slice->end, $conservation_scores, $genomic_align_block->dbID, $genomic_align_block->length, $display_type, $scores);
01139   Description: Faster method to than _get_aligned_scores_from_cigar_line. This
01140                method does not bin the scores and can be used if only require
01141                one score per base in the alignment
01142   Returntype : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
01143   Exceptions : none
01144   Caller     : general
01145   Status     : At risk
01146 
01147 =cut
01148 
01149 sub _get_aligned_scores_from_cigar_line_fast {
01150     my ($self, $cigar_line, $start_region, $end_region, $start_slice, $end_slice, $scores, $genomic_align_block_id, $genomic_align_block_length, $display_type, $aligned_scores) = @_;
01151 
01152     return undef if (!$cigar_line);
01153     
01154     my $num_aligned_scores = scalar(@$aligned_scores);
01155     my @cig = ( $cigar_line =~ /(\d*[GMDIX])/g );
01156 
01157     #start and end of region in alignment coords
01158     my $align_start = 1;
01159     my $align_end = $genomic_align_block_length;
01160 
01161     my $aligned_score;
01162 
01163     my $cs_index;    #conservation score row index
01164     my $num_scores = scalar(@$scores); #number of conservation score rows
01165 
01166     #position in alignment coords to the end of cigar block
01167     my $total_pos;  
01168     #position in chromosome coords to the end of cigar block
01169     my $total_chr_pos = $start_region; 
01170 
01171     my $current_pos; #current position in alignment coords
01172     my $chr_pos = $start_region; #current position in chromosome coords
01173     my $prev_position = 0; #remember previous chr position for dealing with deletions
01174 
01175     my $cigType; #type of cigar element
01176     my $cigLength; #length of cigar element
01177 
01178     my $i;
01179     my $csBlockCnt; #offset into conservation score string
01180     my $diff_score; #store difference score
01181     my @diff_scores;
01182     my $exp_score; #store expected score
01183     my @exp_scores;
01184 
01185     #start and end of the alignment in chromosome coords
01186     my $chr_start = $start_region; 
01187     my $chr_end = $end_region;
01188     
01189     #set start and end to be the minimum of alignment or slice
01190     if ($start_slice > $start_region) {
01191     $chr_start = $start_slice;
01192     }
01193     if ($end_slice < $end_region) {
01194     $chr_end = $end_slice;
01195     }
01196 
01197     #store the number of values in each row in the score array
01198     my $score_lengths;
01199     for (my $j = 0; $j < $num_scores; $j++) {
01200     my $length = 0;
01201     if (defined($scores->[$j]->diff_score)) {
01202         if ($PACKED) {
01203         $length = length($scores->[$j]->diff_score)/$_pack_size;
01204         } else {
01205         my @split_scores = split ' ', $scores->[$j]->diff_score;
01206         $length = scalar(@split_scores);
01207         }
01208     }
01209     push (@$score_lengths, $length);
01210     }
01211     #convert start_region into alignment coords and initialise total_chr_pos
01212     while ($total_chr_pos <= $chr_start) {
01213 
01214     my $cigElem = $cig[$i++];
01215 
01216     $cigType = substr( $cigElem, -1, 1 );
01217     $cigLength = substr( $cigElem, 0 ,-1 );
01218     $cigLength = 1 unless ($cigLength =~ /^\d+$/);
01219 
01220     $current_pos += $cigLength;
01221 
01222     if ($cigType ne "I") {
01223         $total_pos += $cigLength;
01224     }
01225 
01226     if( $cigType eq "M" || $cigType eq "I") {
01227         $total_chr_pos += $cigLength;
01228     }
01229     }
01230 
01231     #find start of region in alignment coords 
01232     my $start_offset = $total_chr_pos - $chr_start;
01233     if ($cigType eq "M") {
01234 #   $align_start = (int(($total_pos - $start_offset + $win_size)/$win_size) * $win_size);
01235     $align_start = $total_pos - $start_offset + 1;
01236     }
01237 
01238     #initialise start of region in chromosome coords
01239     $chr_pos = $chr_start - 1; #Need subtract 1 to allow for a single base ie chr_start=chr_end.
01240 
01241     #Initialise array of index of positions to index in the scores array
01242 
01243     my @hash_positions = ((-1) x (($align_end-$align_start+1)));
01244 
01245     for (my $i = 0; $i < @$scores; $i++) {
01246     my $score = $scores->[$i];
01247     my $length = $score_lengths->[$i] - 1;
01248     my $start = $score->{position};
01249     my $end = $score->{position} + $length;
01250 
01251     for (my $j = $start; $j <= $end; $j++) {
01252         $hash_positions[$j] = $i;
01253     }
01254     }
01255 
01256     #loop round in alignment coords, incrementing by win_size until either
01257     #reached the end of the alignment or end of the slice
01258     #12/03/2007 fixed bug in line below, where $chr_pos <= $chr_end. This gave
01259     #one too many scores when the last bucket position equaled the slice length
01260     #eg for slice of 1000, last bucket position = 1000, get 1001 scores but
01261     #slice of 2000, last bucket position 1999, get 1000 scores.
01262     for ($current_pos = $align_start; $current_pos <= $align_end && $chr_pos < $chr_end; $current_pos += 1) {
01263 
01264     #find conservation score row index containing current_pos. Returns -1
01265     #if no score found
01266     #$cs_index = _find_score_index($scores, $num_scores, $score_lengths, $current_pos, $win_size);
01267     $cs_index = $hash_positions[$current_pos];
01268     if (!defined $cs_index) {
01269         $cs_index = -1;
01270     }
01271 
01272     #if a score has been found, find the score in the score string and 
01273     #unpack it.
01274     unless ($cs_index == -1) {
01275         $csBlockCnt = $current_pos - $scores->[$cs_index]->{position};
01276 
01277         my $value;
01278         if ($PACKED) {
01279         $value = substr $scores->[$cs_index]->{expected_score}, $csBlockCnt*$_pack_size, $_pack_size;
01280         $exp_score = _unpack_score($_pack_type, $value);
01281         $value = substr $scores->[$cs_index]->{diff_score}, $csBlockCnt*$_pack_size, $_pack_size;
01282         $diff_score = _unpack_score($_pack_type, $value);
01283         } else {
01284         @exp_scores = split ' ', $scores->[$cs_index]->exp_score;
01285         $exp_score = $exp_scores[$csBlockCnt];
01286         @diff_scores = split ' ', $scores->[$cs_index]->diff_score;
01287         $diff_score = $diff_scores[$csBlockCnt];
01288         } 
01289     }
01290 
01291     #find the next cigar block that is larger than current_pos
01292     while ($total_pos < $current_pos && $chr_pos < $chr_end) {  
01293         my $cigElem = $cig[$i++];
01294         
01295         $cigType = substr( $cigElem, -1, 1 );
01296         $cigLength = substr( $cigElem, 0 ,-1 );
01297         $cigLength = 1 unless ($cigLength =~ /^\d+$/);
01298         
01299         if ($cigType ne "I") {
01300         $total_pos += $cigLength;
01301         }
01302         #if( $cigType eq "M" ) {
01303         if( $cigType eq "M" || $cigType eq "I") {
01304         $total_chr_pos += $cigLength;
01305         }
01306     }
01307 
01308     #now add the scores to the bucket
01309     if ($cigType eq "M") {
01310         $chr_pos = $total_chr_pos - ($total_pos - $current_pos + 1);
01311         #print "chr_pos=$chr_pos total_chr_pos=$total_chr_pos total_pos=$total_pos current_pos=$current_pos diff_score=$diff_score\n";
01312 
01313         if ($cs_index != -1) {
01314         #in cigar match and have conservation score
01315 
01316         #bit of a hack to turn 0's stored in the database to undefs
01317         if (defined($diff_score) && $diff_score == 0) {
01318             $diff_score = $_no_score_value;
01319             $exp_score = $_no_score_value;
01320         }
01321         #print "final pos " . ($chr_pos - $start_slice + 1) .  " $diff_score\n";
01322         $aligned_score = Bio::EnsEMBL::Compara::ConservationScore->new_fast(
01323               {'adaptor' => $self,
01324               'genomic_align_block_id' => $genomic_align_block_id,
01325               'window_size' => 1,
01326               'position' => $chr_pos - $start_slice + 1,
01327               'seq_region_pos' => $chr_pos,
01328               'diff_score' => $diff_score,
01329               'expected_score' => $exp_score}
01330               );
01331          push(@$aligned_scores, $aligned_score);
01332         }
01333     } else {
01334         $chr_pos = $total_chr_pos - 1;
01335         #not in cigar match so only add the next conservation score or
01336         #_no_score_value if this isn't a score
01337         if ($prev_position != $chr_pos) {
01338         if ($cs_index != -1) {
01339             $aligned_score = Bio::EnsEMBL::Compara::ConservationScore->new_fast(
01340               {'adaptor' => $self,
01341               'genomic_align_block_id' => $genomic_align_block_id,
01342               'window_size' => 1,
01343               'position' => $chr_pos - $start_slice + 1,
01344               'seq_region_pos' => $chr_pos,
01345               'diff_score' => $diff_score,
01346               'expected_score' => $exp_score}
01347               );
01348             push(@$aligned_scores, $aligned_score);
01349         }
01350         }
01351     }
01352     $prev_position = $chr_pos;
01353     }
01354 
01355     return $aligned_scores;
01356 }
01357 
01358 =head2 _get_alignment_scores
01359 
01360   Arg  1     : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores
01361   Arg  2     : int $align_start (start position in alignment coords)
01362   Arg  3     : int $align_end (end position in alignment coords)
01363   Arg  4     : string $display_type (either AVERAGE or MAX (plot average or max value))
01364   Arg  5     : int $win_size (window size used)
01365   Arg  6     : ref to Bio::EnsEMBL::Compara::GenomicAlignBlock object
01366   Example    : $scores = $self->_get_alignment_scores($conservation_scores, 
01367                1, 100000, "AVERAGE", 10, $genomic_align_block);
01368   Description: get scores for an alignment in alignment coordinates
01369   Returntype : listref of Bio::EnsEMBL::Compara::ConservationScore objects $scores in alignment coordinates
01370   Exceptions : none
01371   Caller     : general
01372   Status     : At risk
01373 
01374 =cut
01375 
01376 sub _get_alignment_scores {
01377     my ($self, $conservation_scores, $align_start, $align_end, $display_type, $window_size, $genomic_align_block) = @_;
01378 
01379     my $num_rows = scalar(@$conservation_scores);
01380     my @exp_scores;
01381     my $exp_score;
01382     my @diff_scores;
01383     my $diff_score;
01384     my $aligned_scores = [];
01385     my $pos;
01386 
01387     my $genomic_align = $genomic_align_block->reference_genomic_align;
01388     my $i = 0;
01389     my $total_chr_pos = $genomic_align->dnafrag_start;
01390     my $total_pos;
01391     my $start_uncalled_region = 0;
01392     my $end_uncalled_region = 0;
01393 
01394     my $score_lengths;
01395     my $start_offset = 0;
01396     my $end_offset = 0;
01397     my $start = -1; 
01398     my $end = -1;
01399 
01400     #need to find the start_offset for align_start and end_offset for align_end
01401     #in the conservation score row
01402     for (my $j = 0; $j < $num_rows; $j++) {
01403 
01404     my $length = 0;
01405     if (defined($conservation_scores->[$j]->diff_score)) {
01406         if ($PACKED) {
01407         $length = length($conservation_scores->[$j]->diff_score)/$_pack_size;
01408         } else {
01409         my @split_scores = split ' ', $conservation_scores->[$j]->diff_score;
01410         $length = scalar(@split_scores);
01411         }
01412     }
01413     $length = ($length-1) * $window_size;
01414     
01415     #special case for align_start before the first score position eg when
01416     #have window sizes > 1
01417     if ($start == -1 && $align_start < $conservation_scores->[0]->{position}) {
01418         $start = 0;
01419         $start_offset = 0;
01420     }
01421 
01422     #align_start within a called region
01423     if ($start == -1 && $align_start >= $conservation_scores->[$j]->{position} && $align_start <= $conservation_scores->[$j]->{position} + $length) {
01424         $start= $j;
01425         $start_offset= ($align_start - $conservation_scores->[$j]->{position})/$window_size;
01426     }
01427 
01428 
01429     #align_start in an uncalled region
01430     if ($start == -1 && $align_start < ($conservation_scores->[$j]->{position})) {
01431         $start= $j;
01432         $start_offset = 0;
01433         $start_uncalled_region = 1;
01434     }
01435 
01436          #align_end within a called region. And can stop
01437     if ($align_end >= $conservation_scores->[$j]->{position} && $align_end <= $conservation_scores->[$j]->{position} + $length) {
01438         $end= $j;
01439         $end_offset= int(($align_end - $conservation_scores->[$j]->{position})/$window_size);
01440         last;
01441     }
01442 
01443          #align_end within an uncalled region. And can stop
01444     if ($align_end < ($conservation_scores->[$j]->{position})) {
01445         $end= $j-1;
01446         $end_offset = 0;
01447         $end_uncalled_region = 1;
01448         last;
01449     }   
01450     }
01451     
01452     #haven't found end because it is beyond the last position in 
01453     #conservation_scores which can happen for window_sizes > 1
01454     if ($end == -1) {
01455     $end = $num_rows-1;
01456     $end_offset = int(($align_end - $conservation_scores->[$end]->{position})/$window_size);
01457     }
01458 
01459     my $genomic_align_block_id = $genomic_align_block->dbID;
01460 
01461     #go through rows $start to $end
01462     for (my $i = $start; $i <= $end; $i++) {
01463     my $num_scores = 0;
01464     if (defined($conservation_scores->[$i]->diff_score)) {
01465             if ($PACKED) {
01466                 $num_scores = length($conservation_scores->[$i]->diff_score)/$_pack_size;
01467             } else {
01468                 @exp_scores = split ' ', $conservation_scores->[$i]->exp_score;
01469                 @diff_scores = split ' ', $conservation_scores->[$i]->diff_score;
01470                 $num_scores = scalar(@diff_scores);
01471             }
01472     }
01473 
01474     #last row. If align_end is within a called region, need to recalculate
01475         #num_scores
01476     if ($i == $end && !$end_uncalled_region) {
01477         #num_scores can never be greater than scalar(@diff_scores)
01478         if ($end_offset+1 < $num_scores) {
01479         $num_scores = $end_offset+1;
01480         }
01481     }
01482     
01483     $pos = $conservation_scores->[$i]->{position};
01484 
01485     #first time round start at offset if align_start is within a called 
01486     #region
01487     for (my $j = int($start_offset); $j < $num_scores; $j++) {
01488 
01489         #increment pos by start_offset
01490         $pos += ($start_offset*$window_size);
01491 
01492         #set offset to 0 for all other rows
01493         $start_offset = 0;
01494 
01495         if ($PACKED) {
01496         my $value;
01497         $value = substr $conservation_scores->[$i]->expected_score, $j*$_pack_size, $_pack_size;
01498         $exp_score = _unpack_score($_pack_type, $value);
01499 
01500         $value = substr $conservation_scores->[$i]->diff_score, $j*$_pack_size, $_pack_size;
01501         $diff_score = _unpack_score($_pack_type, $value);
01502         } else {
01503         $exp_score = $exp_scores[$j];
01504         $diff_score = $diff_scores[$j];
01505         } 
01506 
01507         my $aligned_score = 0;
01508         $aligned_score = _add_to_bucket($self, $display_type, $exp_score, $diff_score, $pos - $align_start + 1, 1, scalar(@$aligned_scores), $genomic_align_block_id, $window_size);  
01509 
01510         if ($aligned_score) {
01511         push(@$aligned_scores, $aligned_score);
01512         }
01513         $pos+=$window_size;
01514     }
01515     #add uncalled scores for regions between called blocks
01516     my $next_pos;
01517     if ($i < $end) {
01518         $next_pos = $conservation_scores->[$i+1]->{position};
01519     } else {
01520         $next_pos = $align_end+1;
01521     }
01522       
01523     for (my $j = $pos; $j < $next_pos; $j+=$window_size) {
01524         my $aligned_score = _add_to_bucket($self, $display_type, $_no_score_value, $_no_score_value, ($j - $align_start + 1), 1, scalar(@$aligned_scores), $genomic_align_block_id, $window_size);  
01525         if ($aligned_score) {
01526         push(@$aligned_scores, $aligned_score);
01527         last;
01528         }
01529     }
01530     }
01531     #foreach my $s (@$aligned_scores) {
01532     #print STDERR "score " . $s->position . " " . $s->diff_score . "\n";
01533     #}
01534 
01535     
01536     #hack to remove zeros after they've been added. Better to not add them
01537     #in the first place (but haven't got the code working yet)
01538     #remove _no_score_values from aligned_scores array
01539     $i = 0;
01540     while ($i < scalar(@$aligned_scores)) {
01541         if (!defined($_no_score_value) && 
01542             !defined($aligned_scores->[$i]->diff_score)) {
01543         splice @$aligned_scores, $i, 1;
01544         } elsif (defined($_no_score_value) && 
01545              $aligned_scores->[$i]->diff_score == $_no_score_value) {
01546             splice @$aligned_scores, $i, 1;
01547     } else {
01548             $i++;
01549     }
01550      }
01551 
01552     #need to shift positions if align_start is in an uncalled region because
01553     #need to add the uncalled positions up to the start of the next called 
01554     #block
01555    # for (my $i = 0; $i < scalar(@$aligned_scores); $i++) {
01556 #   $aligned_scores->[$i]->{position} = $aligned_scores->[$i]->{position}-$align_start+1;  
01557  #     }
01558 
01559     return $aligned_scores;
01560 }
01561 
01562 =head2 _add_to_bucket
01563 
01564   Arg  1     : string $display_type (either AVERAGE or MAX (plot average or max value))
01565   Arg  2     : float $exp_score (expected score to be added to bucket)
01566   Arg  3     : float $diff_score (difference score to be added to bucket)
01567   Arg  4     : int $chr_pos (position in slice of reference species)
01568   Arg  5     : int $start_slice (start position of slice)
01569   Arg  6     : int $num_buckets (number of buckets used so far)
01570   Arg  7     : int $genomic_align_block_id (genomic_align_block_id of 
01571                alignment block)
01572   Arg  8     : int $win_size window size used
01573   Example    : $aligned_score = _add_to_bucket($self, "AVERAGE", $exp_score, $diff_score, $chr_pos, $start_slice, scalar(@$aligned_scores), $genomic_align_block_id, $win_size);
01574   Description: Add scores to bucket until it is full (given by size) and then 
01575                average the called scores or take the maximum (given by 
01576                display_type). Once the bucket is full, create a new 
01577                conservation score object
01578   Returntype : ref to Bio::EnsEMBL::Compara::ConservationScore object if the
01579                bucket if full or 0 if it isn't full yet
01580   Exceptions : none
01581   Caller     : general
01582   Status     : At risk
01583 
01584 =cut
01585 
01586 sub _add_to_bucket {
01587     #bucket structure:
01588     #cnt: keep track of number of scores been added
01589     #start_pos: position of first score in slice coords
01590     #start_seq_region_pos: position of first score in chr coords
01591     #exp_score: sum or max of expected scores
01592     #diff_score: sum or max of difference scores
01593     #called: number of called scores (used to average)
01594     #size: number of bases/bucket
01595 
01596     my ($self, $display_type, $exp_score, $diff_score, $chr_pos, $start_slice, $num_buckets, $genomic_align_block_id, $win_size) = @_;
01597     my $p = 0;
01598     my $s;
01599     my $final_exp_score;
01600     my $final_diff_score;
01601     my $filled_bucket = 0;
01602 
01603     #bit of a hack to turn 0's stored in the database to undefs
01604     if (defined($diff_score) && $diff_score == 0) {
01605     $diff_score = $_no_score_value;
01606     }
01607 
01608     #store start of bucket position
01609     if ($_bucket->{cnt} == 0) {
01610 
01611     $_bucket->{start_pos} = $chr_pos - $start_slice + 1;
01612     $_bucket->{start_seq_region_pos} = $chr_pos;
01613 
01614     #initialise diff_score for new bucket
01615     if ($display_type eq "AVERAGE") {
01616         $_bucket->{exp_score} = 0;
01617         $_bucket->{diff_score} = 0;
01618     } else {
01619         $_bucket->{exp_score} = $exp_score;
01620         $_bucket->{diff_score} = $diff_score;
01621     }
01622     }
01623 
01624     #convert chr_pos into slice coords
01625     my $end_pos = $chr_pos - $start_slice + 1;
01626 
01627     my $end_seq_region_pos = $chr_pos;
01628 
01629     if ($display_type eq "AVERAGE") {
01630 
01631     #store the scores
01632     if (defined $_no_score_value) {
01633         if ($diff_score != $_no_score_value) {
01634         $_bucket->{exp_score} += $exp_score;
01635         $_bucket->{diff_score} += $diff_score;
01636         $_bucket->{called}++;
01637         }
01638     } else {
01639         if (defined $diff_score) {
01640         $_bucket->{exp_score} += $exp_score;
01641         $_bucket->{diff_score} += $diff_score;
01642         $_bucket->{called}++;
01643         }
01644     }
01645 
01646     $_bucket->{cnt}++;
01647 
01648     #check to see if filled bucket NB end_pos is in slice coords
01649     #so multiply size (number of bases/bucket) by number of buckets used so
01650     #far (plus 1 because it starts at 0)
01651 
01652     my $num_align_scores = floor($end_pos/ $_bucket->{size});
01653 
01654     if ($num_align_scores > $_bucket->{current}) {
01655       $_bucket->{current} = $num_align_scores;
01656 
01657         #take average position 
01658         $p = int(($end_pos + $_bucket->{start_pos})/2);
01659         $s = int(($end_seq_region_pos + $_bucket->{start_seq_region_pos})/2);
01660         #take average score
01661         if ($_bucket->{called} == 0) {
01662         $final_exp_score  = $_no_score_value;
01663         $final_diff_score  = $_no_score_value;
01664         } else {
01665         #should average over complete bucket even if not all values are
01666         #called
01667         #$final_score = $_bucket->{diff_score}/$_bucket->{called};
01668         $final_exp_score = $_bucket->{exp_score}/$_bucket->{cnt};
01669         $final_diff_score = $_bucket->{diff_score}/$_bucket->{cnt};
01670         } 
01671         $filled_bucket = 1;
01672     }
01673     } else {
01674     #find the max score of the difference, and store the exp scores
01675     #for this too.
01676 
01677     #bucket->{diff_score} will be undefined if the first score in the
01678     #bucket is undefined.
01679     if (!defined $_bucket->{diff_score} && defined($diff_score)) {
01680         $_bucket->{diff_score} = $diff_score;
01681         $_bucket->{exp_score} = $exp_score;
01682     }
01683     if (defined($diff_score) && $_bucket->{diff_score} < $diff_score) {
01684         $_bucket->{diff_score} = $diff_score;
01685         $_bucket->{exp_score} = $exp_score;
01686     }
01687     $_bucket->{cnt}++;
01688 
01689     #check to see if filled bucket NB end_pos is in slice coords
01690     #so multiply size (number of bases/bucket) by number of buckets used so
01691     #far (plus 1 because it starts at 0)
01692     if ($end_pos >= ($_bucket->{size} * ($num_buckets+1))) {
01693         $p = int(($end_pos + $_bucket->{start_pos})/2);
01694         $s = int(($end_seq_region_pos + $_bucket->{start_seq_region_pos})/2);
01695 
01696         $final_exp_score = $_bucket->{exp_score};
01697         $final_diff_score = $_bucket->{diff_score};
01698         $filled_bucket = 1;
01699     }
01700     }
01701     #if bucket is full, create a new conservation score
01702     #if (defined $final_diff_score) {
01703     if ($filled_bucket) {
01704     my $aligned_score = Bio::EnsEMBL::Compara::ConservationScore->new_fast(
01705               {'adaptor' => $self,
01706               'genomic_align_block_id' => $genomic_align_block_id,
01707               'window_size' => $win_size,
01708               'position' => ($p or 1),
01709               'seq_region_pos' => $s,
01710               'diff_score' => $final_diff_score,
01711               'expected_score' => $final_exp_score}
01712               );
01713     
01714     $_bucket->{exp_score} = 0;
01715     $_bucket->{diff_score} = 0;
01716     $_bucket->{cnt} = 0;
01717     $_bucket->{called} = 0;
01718     $filled_bucket = 0;
01719 
01720     return $aligned_score;
01721     }
01722     #return 0 if not filled bucket
01723     return 0;
01724 }
01725 
01726 =head2 _get_all_ref_genomic_aligns
01727 
01728   Arg  1     : ref to Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
01729   Arg  2     : ref to Bio::EnsEMBL::Slice object
01730   Example    :  my $light_genomic_aligns = $self->_get_all_ref_genomic_aligns($ma_mlss, $slice);
01731   Description: Retrieve from the database some genomic_align information 
01732                relating to only the slice species. 
01733   Returntype : listref of hash containing a subset of genomic_align fields
01734   Exceptions : none
01735   Caller     : general
01736   Status     : At risk
01737 
01738 =cut
01739 
01740 sub _get_all_ref_genomic_aligns {
01741     my ($self, $mlss, $slice) = @_;
01742 
01743     my $light_genomic_aligns = []; # Returned value
01744 
01745     my $slice_adaptor = $slice->adaptor();
01746     if(!$slice_adaptor) {
01747     warning("Slice has no attached adaptor. Cannot get Compara alignments.");
01748     return $light_genomic_aligns;
01749     }
01750 
01751     my $gdb_a = $self->db->get_GenomeDBAdaptor();
01752     my $meta_container = $slice->adaptor->db->get_MetaContainer();
01753     my $primary_species_name = $gdb_a->get_species_name_from_core_MetaContainer($meta_container);
01754     my ($highest_cs) = @{$slice_adaptor->db->get_CoordSystemAdaptor->fetch_all()};
01755     my $primary_species_assembly = $highest_cs->version();
01756     my $genome_db_adaptor = $self->db->get_GenomeDBAdaptor;
01757     my $genome_db = $genome_db_adaptor->fetch_by_name_assembly(
01758                 $primary_species_name,
01759                         $primary_species_assembly);
01760     my $dnafrag_adaptor = $self->db->get_DnaFragAdaptor;
01761     my $dnafrag = $dnafrag_adaptor->fetch_by_GenomeDB_and_name(
01762                 $genome_db, $slice->seq_region_name);
01763     next if (!$dnafrag);
01764 
01765     my $max_alignment_length = $mlss->max_alignment_length;
01766     my $lower_bound = $slice->start - $max_alignment_length;
01767     
01768     my $sql = qq{
01769           SELECT 
01770              genomic_align_block_id,
01771              dnafrag_start,
01772              dnafrag_end,
01773              dnafrag_strand,
01774              cigar_line,
01775              length
01776           FROM 
01777              genomic_align 
01778           LEFT JOIN
01779              genomic_align_block
01780           USING
01781              (genomic_align_block_id)
01782           WHERE 
01783              genomic_align.method_link_species_set_id = ?
01784              AND dnafrag_id = ?
01785              AND dnafrag_start <= ?
01786              AND dnafrag_end >= ?
01787              AND dnafrag_start >= ?
01788            };
01789 
01790      my $sth = $self->prepare($sql);
01791 
01792     $sth->execute($mlss->dbID,  $dnafrag->dbID, $slice->end, $slice->start, $lower_bound);
01793 
01794     my ($genomic_align_block_id, $dnafrag_start, $dnafrag_end, $dnafrag_strand, $cigar_line, $length);
01795     $sth->bind_columns(\$genomic_align_block_id, \$dnafrag_start, \$dnafrag_end, \$dnafrag_strand, \$cigar_line, \$length);
01796 
01797     while ($sth->fetch) {
01798     my $light_genomic_align = {
01799         genomic_align_block_id => $genomic_align_block_id,
01800             dnafrag_start => $dnafrag_start,
01801             dnafrag_end => $dnafrag_end,
01802         dnafrag_strand => $dnafrag_strand,
01803             cigar_line => $cigar_line,
01804             length => $length};
01805 
01806     push @$light_genomic_aligns, $light_genomic_align;
01807     }  
01808     return $light_genomic_aligns;
01809 }
01810 
01811 
01812 1;
01813