Archive Ensembl HomeArchive Ensembl Home
RemoveAlignmentDataInconsistencies.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::RunnableDB::PairAligner::RemoveAlignmentDataInconsistencies
00022 
00023 =cut
00024 
00025 =head1 SYNOPSIS
00026 
00027 
00028 $runnable->fetch_input(); #reads from DB
00029 $runnable->run();
00030 $runnable->write_output(); #writes to DB
00031 
00032 =cut
00033 
00034 =head1 DESCRIPTION
00035 
00036 Checks for data inconsistencies in the genomic_align_block and genomic_align tables eg there are 2 genomic_aligns for each genomic_align_block. Removes any inconsistencies.
00037 
00038 =cut
00039 
00040 =head1 CONTACT
00041 
00042 Describe contact details here
00043 
00044 =cut
00045 
00046 =head1 APPENDIX
00047 
00048 The rest of the documentation details each of the object methods.
00049 Internal methods are usually preceded with a _
00050 
00051 =cut
00052 
00053 package Bio::EnsEMBL::Compara::RunnableDB::PairAligner::RemoveAlignmentDataInconsistencies;
00054 
00055 use strict;
00056 use Time::HiRes qw(time gettimeofday tv_interval);
00057 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00058 
00059 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00060 
00061 =head2 fetch_input
00062 
00063     Title   :   fetch_input
00064     Usage   :   $self->fetch_input
00065     Function:   prepares global variables and DB connections
00066     Returns :   none
00067     Args    :   none
00068 
00069 =cut
00070 
00071 sub fetch_input {
00072   my( $self) = @_;
00073 
00074   $self->compara_dba->dbc->disconnect_when_inactive(0);
00075 
00076   if (defined $self->param('output_method_link') && defined $self->param('query_genome_db_id') && $self->param('target_genome_db_id')) {
00077     my $mlssa = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
00078     my $mlss = $mlssa->fetch_by_method_link_type_genome_db_ids($self->param('output_method_link'), [$self->param('query_genome_db_id'),$self->param('target_genome_db_id')]);
00079 
00080     if (defined $mlss && !defined $self->param('method_link_species_set_id')) {
00081     $self->param('method_link_species_set_id', $mlss->dbID);
00082     }
00083   }
00084 
00085   return 1;
00086 }
00087 
00088 
00089 sub run
00090 {
00091   my $self = shift;
00092   $self->remove_alignment_data_inconsistencies;
00093   return 1;
00094 }
00095 
00096 
00097 sub write_output
00098 {
00099   my $self = shift;
00100   return 1;
00101 }
00102 
00103 
00104 ######################################
00105 #
00106 # subroutines
00107 #
00108 #####################################
00109 
00110 sub remove_alignment_data_inconsistencies {
00111   my $self = shift;
00112 
00113   my $dba = $self->compara_dba;
00114 
00115   $dba->dbc->do("analyze table genomic_align_block");
00116   $dba->dbc->do("analyze table genomic_align");
00117 
00118   #Delete genomic align blocks which have no genomic aligns. Assume not many of these
00119   #
00120 
00121   my $sql_gab = "delete from genomic_align_block where genomic_align_block_id in ";
00122   my $sql_ga = "delete from genomic_align where genomic_align_id in ";
00123 
00124   my $gab_sel = '';
00125   my @gab_args;
00126   if($self->param('method_link_species_set_id')) {
00127     $gab_sel = 'AND gab.method_link_species_set_id =?';
00128     push(@gab_args, $self->param('method_link_species_set_id'));
00129   }
00130   my $sql = "SELECT gab.genomic_align_block_id FROM genomic_align_block gab LEFT JOIN genomic_align ga ON gab.genomic_align_block_id=ga.genomic_align_block_id WHERE ga.genomic_align_block_id IS NULL ${gab_sel}";
00131 
00132     print "Running: ${sql}\n" if $self->debug();
00133 
00134   my $sth = $dba->dbc->prepare($sql);
00135   $sth->execute(@gab_args);
00136 
00137   my @gab_ids;
00138   while (my $aref = $sth->fetchrow_arrayref) {
00139     my ($gab_id) = @$aref;
00140     push @gab_ids, $gab_id;
00141   }
00142   $sth->finish;
00143 
00144   #check if any results found
00145   if (scalar @gab_ids) {
00146       $self->warning("Found " . scalar @gab_ids . " genomic_align_blocks with no genomic_aligns");
00147 
00148     my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ");";
00149     my $sth = $dba->dbc->prepare($sql_gab_to_exec);
00150     $sth->execute;
00151     $sth->finish;
00152   }
00153 
00154   #
00155   #Delete genomic align blocks which have 1 genomic align. Assume not many of these
00156   #
00157   my @del_args;
00158   if($self->param('method_link_species_set_id')) {
00159     $sql = 'SELECT gab.genomic_align_block_id, ga.genomic_align_id FROM genomic_align_block gab LEFT JOIN genomic_align ga USING (genomic_align_block_id) WHERE gab.method_link_species_set_id =? GROUP BY genomic_align_block_id HAVING count(*)<2';
00160     push(@del_args, $self->param('method_link_species_set_id'));
00161   }
00162   else {
00163     $sql = 'SELECT genomic_align_block_id, genomic_align_id FROM genomic_align GROUP BY genomic_align_block_id HAVING count(*)<2';
00164   }
00165 
00166   print "Running: ${sql}\n" if $self->debug();
00167 
00168   $sth = $dba->dbc->prepare($sql);
00169   $sth->execute(@del_args);
00170 
00171   @gab_ids = ();
00172   my @ga_ids;
00173   while (my $aref = $sth->fetchrow_arrayref) {
00174     my ($gab_id, $ga_id) = @$aref;
00175     push @gab_ids, $gab_id;
00176     push @ga_ids, $ga_id;
00177   }
00178   $sth->finish;
00179 
00180   if (scalar @gab_ids) {
00181       $self->warning("Found " . scalar @gab_ids . " genomic_align_blocks with only one genomic_align.");
00182 
00183     my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ")";
00184     my $sql_ga_to_exec = $sql_ga . "(" . join(",", @ga_ids) . ")";
00185 
00186     foreach my $sql ($sql_ga_to_exec,$sql_gab_to_exec) {
00187       my $sth = $dba->dbc->prepare($sql);
00188       $sth->execute;
00189       $sth->finish;
00190     }
00191   }
00192 }
00193 
00194 
00195 1;