Archive Ensembl HomeArchive Ensembl Home
UpdateMaxAlignmentLength.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::Production::EPOanchors::UpdateMaxAlignmentLength
00022 
00023 =cut
00024 
00025 =head1 SYNOPSIS
00026 
00027 
00028 $runnable->fetch_input(); #reads from DB
00029 $runnable->run();
00030 $runnable->write_output(); #writes to DB
00031 
00032 =cut
00033 
00034 =head1 DESCRIPTION
00035 
00036 Updates the entries in the meta table for the largest alignment length for a give method link species set.
00037 Checks for data inconsistencies in the genomic_align_block and genomic_align tables eg there are 2 genomic_aligns for each genomic_align_block. Removes any inconsistencies.
00038 
00039 =cut
00040 
00041 =head1 CONTACT
00042 
00043 Describe contact details here
00044 
00045 =cut
00046 
00047 =head1 APPENDIX
00048 
00049 The rest of the documentation details each of the object methods.
00050 Internal methods are usually preceded with a _
00051 
00052 =cut
00053 
00054 package Bio::EnsEMBL::Compara::Production::EPOanchors::UpdateMaxAlignmentLength;
00055 
00056 use strict;
00057 use Time::HiRes qw(time gettimeofday tv_interval);
00058 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00059 
00060 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00061 
00062 =head2 fetch_input
00063 
00064     Title   :   fetch_input
00065     Usage   :   $self->fetch_input
00066     Function:   prepares global variables and DB connections
00067     Returns :   none
00068     Args    :   none
00069 
00070 =cut
00071 
00072 sub fetch_input {
00073   my( $self) = @_;
00074 
00075   $self->compara_dba->dbc->disconnect_when_inactive(0);
00076 
00077   if (defined $self->param('output_method_link') && defined $self->param('query_genome_db_id') && $self->param('target_genome_db_id')) {
00078     my $mlssa = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
00079     my $mlss = $mlssa->fetch_by_method_link_type_genome_db_ids($self->param('output_method_link'), [$self->param('query_genome_db_id'),$self->param('target_genome_db_id')]);
00080 
00081     if (defined $mlss && !defined $self->param('method_link_species_set_id')) {
00082     $self->param('method_link_species_set_id', $mlss->dbID);
00083     }
00084   }
00085 
00086   return 1;
00087 }
00088 
00089 
00090 sub run
00091 {
00092   my $self = shift;
00093 #  $self->remove_alignment_data_inconsistencies;
00094   $self->update_meta_table;
00095   return 1;
00096 }
00097 
00098 
00099 sub write_output
00100 {
00101   my $self = shift;
00102   return 1;
00103 }
00104 
00105 
00106 ######################################
00107 #
00108 # subroutines
00109 #
00110 #####################################
00111 
00112 sub update_meta_table {
00113   my $self = shift;
00114 
00115   my $dba = $self->compara_dba;
00116   my $mc = $dba->get_MetaContainer;
00117 
00118   $dba->dbc->do("analyze table genomic_align_block");
00119   $dba->dbc->do("analyze table genomic_align");
00120 
00121   #Don't like doing this but it looks to be the only way to avoid going mad WRT where & and clauses
00122   my @args;
00123   my ($mlss_where_clause, $mlss_and_clause) = ('','');
00124   if ($self->param('method_link_species_set_id')) {
00125     $mlss_where_clause = ' WHERE gab.method_link_species_set_id =? ';
00126     $mlss_and_clause = ' AND gab.method_link_species_set_id =? ';
00127     push(@args, $self->param('method_link_species_set_id'));
00128   }
00129 
00130   my $sql;
00131   if ($self->param('quick')) {
00132       $sql = "SELECT gab.method_link_species_set_id, max(gab.length) FROM genomic_align_block gab ${mlss_where_clause} GROUP BY gab.method_link_species_set_id";
00133   } else {
00134     $sql = "SELECT ga.method_link_species_set_id, max(ga.dnafrag_end - ga.dnafrag_start + 1) FROM genomic_align_block gab, genomic_align ga WHERE gab.genomic_align_block_id = ga.genomic_align_block_id ${mlss_and_clause} GROUP BY ga.method_link_species_set_id";
00135   }
00136 
00137   print "Running: ${sql}\n" if $self->debug();
00138 
00139   my $sth = $dba->dbc->prepare($sql);
00140 
00141   $sth->execute(@args);
00142 
00143   my $max_alignment_length = 0;
00144   my ($method_link_species_set_id,$max_align);
00145   $sth->bind_columns(\$method_link_species_set_id,\$max_align);
00146 
00147   while ($sth->fetch()) {
00148     my $key = "max_align_".$method_link_species_set_id;
00149     $mc->delete_key($key);
00150     $mc->store_key_value($key, $max_align + 1);
00151     $max_alignment_length = $max_align if ($max_align > $max_alignment_length);
00152     print STDERR "Stored key:$key value:",$max_align + 1," in meta table\n";
00153   }
00154   $mc->delete_key("max_alignment_length");
00155   $mc->store_key_value("max_alignment_length", $max_alignment_length + 1);
00156   print STDERR "Stored key:max_alignment_length value:",$max_alignment_length + 1," in meta table\n";
00157 
00158   $sth->finish;
00159 
00160 }
00161 
00162 sub remove_alignment_data_inconsistencies {
00163   my $self = shift;
00164 
00165   my $dba = $self->compara_dba;
00166 
00167   $dba->dbc->do("analyze table genomic_align_block");
00168   $dba->dbc->do("analyze table genomic_align");
00169   $dba->dbc->do("analyze table genomic_align_group");
00170 
00171   #Delete genomic align blocks which have no genomic aligns. Assume not many of these
00172   #
00173 
00174   my $sql_gab = "delete from genomic_align_block where genomic_align_block_id in ";
00175   my $sql_ga = "delete from genomic_align where genomic_align_id in ";
00176 #  my $sql_gag = "delete from genomic_align_group where genomic_align_id in ";
00177 
00178   my $gab_sel = '';
00179   my @gab_args;
00180   if($self->param('method_link_species_set_id')) {
00181     $gab_sel = 'AND gab.method_link_species_set_id =?';
00182     push(@gab_args, $self->param('method_link_species_set_id'));
00183   }
00184   my $sql = "SELECT gab.genomic_align_block_id FROM genomic_align_block gab LEFT JOIN genomic_align ga ON gab.genomic_align_block_id=ga.genomic_align_block_id WHERE ga.genomic_align_block_id IS NULL ${gab_sel}";
00185 
00186     print "Running: ${sql}\n" if $self->debug();
00187 
00188   my $sth = $dba->dbc->prepare($sql);
00189   $sth->execute(@gab_args);
00190 
00191   my @gab_ids;
00192   while (my $aref = $sth->fetchrow_arrayref) {
00193     my ($gab_id) = @$aref;
00194     push @gab_ids, $gab_id;
00195   }
00196   $sth->finish;
00197 
00198   #check if any results found
00199   if (scalar @gab_ids) {
00200     my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ");";
00201     my $sth = $dba->dbc->prepare($sql_gab_to_exec);
00202     $sth->execute;
00203     $sth->finish;
00204   }
00205 
00206   #
00207   #Delete genomic align blocks which have 1 genomic align. Assume not many of these
00208   #
00209   my @del_args;
00210   if($self->param('method_link_species_set_id')) {
00211     $sql = 'SELECT gab.genomic_align_block_id, ga.genomic_align_id FROM genomic_align_block gab LEFT JOIN genomic_align ga USING (genomic_align_block_id) WHERE gab.method_link_species_set_id =? GROUP BY genomic_align_block_id HAVING count(*)<2';
00212     push(@del_args, $self->param('method_link_species_set_id'));
00213   }
00214   else {
00215     $sql = 'SELECT genomic_align_block_id, genomic_align_id FROM genomic_align GROUP BY genomic_align_block_id HAVING count(*)<2';
00216   }
00217 
00218   print "Running: ${sql}\n" if $self->debug();
00219 
00220   $sth = $dba->dbc->prepare($sql);
00221   $sth->execute(@del_args);
00222 
00223   @gab_ids = ();
00224   my @ga_ids;
00225   while (my $aref = $sth->fetchrow_arrayref) {
00226     my ($gab_id, $ga_id) = @$aref;
00227     push @gab_ids, $gab_id;
00228     push @ga_ids, $ga_id;
00229   }
00230   $sth->finish;
00231 
00232   if (scalar @gab_ids) {
00233     my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ")";
00234     my $sql_ga_to_exec = $sql_ga . "(" . join(",", @ga_ids) . ")";
00235 #    my $sql_gag_to_exec = $sql_gag . "(" . join(",", @ga_ids) . ")";
00236 
00237     foreach my $sql ($sql_ga_to_exec,$sql_gab_to_exec) {#,$sql_gag_to_exec) {
00238       my $sth = $dba->dbc->prepare($sql);
00239       $sth->execute;
00240       $sth->finish;
00241     }
00242   }
00243 }
00244 
00245 
00246 1;