Archive Ensembl HomeArchive Ensembl Home
HealthCheck.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::HealthCheck
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 $module->fetch_input
00017 
00018 $module->run
00019 
00020 $module->write_output
00021 
00022 =cut
00023 
00024 =head1 DESCRIPTION
00025 
00026 This module is inteded to run automatic checks at the end of a pipeline (or at any other time)
00027 
00028 =head1 OPTIONS
00029 
00030 This module has been designed to run one test per job. All the options are specific to
00031 the test iself and therefore you shouldn't set any parameters in the analysis table. Use the input_id
00032 column of the job to set these values.
00033 
00034 =head2 test
00035 
00036 The name of the test to be run. See below for a list of tests
00037 
00038 =head2 params
00039 
00040 Parameters used by the test
00041 
00042 =head1 TESTS
00043 
00044 =head2 conservation_jobs
00045 
00046 This test checks that there are one conservation analysis per alignment.
00047 
00048 Parameters:
00049 
00050 =over
00051 
00052 =item logic_name
00053 
00054 Logic name for the Conservation analysis. Default: Gerp
00055 
00056 =item method_link_type (or from_method_link_type)
00057 
00058 method_link_type for the multiple alignments. Default: PECAN
00059 
00060 =back
00061 
00062 =head2 conservation_scores
00063 
00064 This test checks whether there are conservation scores in the table, whether
00065 these correspond to existing genomic_align_blocks, whether the
00066 right gerp_XXX entry exists in the meta table and whether there
00067 are no alignments wiht more than 3 seqs and no scores.
00068 
00069 Parameters:
00070 
00071 =over
00072 
00073 =item method_link_species_set_id (or mlss_id)
00074 
00075 Specify the method_link_species_set_id for the conservation scores. Note that this can
00076 be guessed from the database altough specifying the right mlss_id is probably safer.
00077 
00078 For instance, it may happen that you expect 2 or 3 sets of scores. In that case it is
00079 recommended to create one test for each of these set. If one of the sets is missing,
00080 the test will succeed as it will successfully guess the mlss_id for the other sets and
00081 check those values only.
00082 
00083 =back
00084 
00085 =head1 EXAMPLES
00086 
00087 Here are some input_id examples:
00088 
00089 =over
00090 
00091 =item {test=>'conservation_jobs'}
00092 
00093 Run the conservation_jobs test. Default parameters
00094 
00095 =item {test=>'conservation_jobs', params=>{logic_name=>'Gerp', method_link_type=>'PECAN'}}
00096 
00097 Run the conservation_jobs test. Specify logic_name for the Conservation analysis and method_link_type
00098 for the underlying multiple alignments
00099 
00100 =item {test=>'conservation_scores'}
00101 
00102 Run the conservation_scores test. Default parameters
00103 
00104 =item {test=>'conservation_scores', params=>{mlss_id=>50002}}
00105 
00106 Run the conservation_scores test. Specify the method_link_species_set_id for the conservation scores
00107 
00108 =back
00109 
00110 =head1 CONTACT
00111 
00112 Javier Herrero <jherrero@ebi.ac.uk>
00113 
00114 =cut
00115 
00116 =head1 APPENDIX
00117 
00118 The rest of the documentation details each of the object methods.
00119 Internal methods are usually preceded with a _
00120 
00121 =cut
00122 
00123 package Bio::EnsEMBL::Compara::RunnableDB::HealthCheck;
00124 
00125 use strict;
00126 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00127 #use Bio::EnsEMBL::Utils::Exception;
00128 
00129 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00130 
00131 sub fetch_input {
00132   my ($self) = @_;
00133 
00134   return 1;
00135 }
00136 
00137 
00138 sub run
00139 {
00140   my $self = shift;
00141 
00142   if ($self->param('test')) {
00143       ## Run the method called <_run_[TEST_NAME]_test>
00144       my $method = "_run_".$self->param('test')."_test";
00145       if ($self->can($method)) {
00146       $self->warning("Running test ", $self->param('test'));
00147       $self->$method;
00148       $self->warning("OK.");
00149       } else {
00150       die "There is no test called ".$self->param('test')."\n";
00151       }
00152   }
00153   return 1;
00154 }
00155 
00156 sub write_output {
00157   my ($self) = @_;
00158 
00159   return 1;
00160 }
00161 
00162 
00163 sub test_table {
00164   my ($self, $table_name) = @_;
00165 
00166   die "Cannot test table with no name\n" if (!$table_name);
00167 
00168   ## check the table is not empty
00169   my $count = $self->compara_dba->dbc->db_handle->selectrow_array(
00170       "SELECT COUNT(*) FROM $table_name");
00171 
00172   if ($count == 0) {
00173     die("There are no entries in the $table_name table!\n");
00174   } else {
00175     $self->warning("Table $table_name contains data: OK.");
00176   }
00177 
00178 }
00179 
00180 =head2 _run_conservation_jobs_test
00181 
00182   Arg[1]      : string representing a hashref of options.
00183                 Possible options are:
00184                   logic_name => Logic name for the Conservation Score
00185                       analysis. Default: Gerp
00186                   method_link_type => corresponds to the multiple
00187                       alignments. Default: PECAN
00188   Example     : $self->_run_conservation_jobs_test();
00189   Example     : $self->_run_conservation_jobs_test("{logic_name=>'GERP',
00190                     method_link_type=>'PECAN'}");
00191   Description : Tests whether there is one conservation job per multiple
00192                 alignment or not. This test only look at the number of jobs.
00193   Returntype  :
00194   Exceptions  : die on failure
00195   Caller      : general
00196 
00197 =cut
00198 
00199 sub _run_conservation_jobs_test {
00200   my ($self, $parameters) = @_;
00201 
00202   my $logic_name = "Gerp";
00203   my $method_link_type = "PECAN";
00204 
00205   $logic_name = $self->param('logic_name') if (defined($self->param('logic_name')));
00206   $method_link_type = $self->param('from_method_link_type') if (defined($self->param('from_method_link_type')));
00207   $method_link_type = $self->param('method_link_type') if (defined($self->param('method_link_type')));
00208 
00209   ## Get the number of jobs for Gerp (or any other specified analysis)
00210   my $count1 = $self->compara_dba->dbc->db_handle->selectrow_array(
00211       "SELECT COUNT(*) FROM analysis LEFT JOIN job ".
00212       " USING (analysis_id) WHERE logic_name = \"$logic_name\"");
00213 
00214   ## Get the number of Pecan (or any other specified method_link_type) alignments
00215   my $count2 = $self->compara_dba->dbc->db_handle->selectrow_array(
00216       "SELECT COUNT(*) FROM method_link".
00217       " LEFT JOIN method_link_species_set USING (method_link_id)".
00218       " LEFT JOIN genomic_align_block USING (method_link_species_set_id)".
00219       " WHERE method_link.type = \"$method_link_type\"");
00220 
00221   if ($count1 != $count2) {
00222     die("There are $count1 jobs for $logic_name while there are $count2 $method_link_type alignments!\n");
00223   } elsif ($count1 == 0) {
00224     die("There are no jobs for $logic_name and no $method_link_type alignments!\n");
00225   }
00226 }
00227 
00228 
00229 =head2 _run_conservation_scores_test
00230 
00231   Arg[1]      : string representing a hashref of options.
00232                 Possible options are:
00233                   method_link_species_set_id => method_link_species_set_id
00234                       for the conservation scores
00235   Example     : $self->_run_conservation_scores_test();
00236   Example     : $self->_run_conservation_scores_test(
00237                     "{method_link_species_set_id=>123}");
00238   Description : Tests whether there are conservation scores in the table, whether
00239                 these correspond to existing genomic_align_blocks, whether the
00240                 right gerp_XXX entry exists in the meta table and whether there
00241                 are no alignments with more than 3 seqs and no scores.
00242   Returntype  :
00243   Exceptions  : die on failure
00244   Caller      : general
00245 
00246 =cut
00247 
00248 sub _run_conservation_scores_test {
00249   my ($self, $parameters) = @_;
00250 
00251   my $method_link_species_set_id = 0;
00252   $method_link_species_set_id = $self->param('method_link_species_set_id') if (defined($self->param('method_link_species_set_id')));
00253 
00254   $method_link_species_set_id = $self->param('mlss_id') if (defined($self->param('mlss_id')));
00255 
00256   $self->test_table("conservation_score");
00257   $self->test_table("genomic_align_block");
00258   $self->test_table("meta");
00259 
00260   my $count1 = $self->compara_dba->dbc->db_handle->selectrow_array(
00261       "SELECT COUNT(*) FROM conservation_score LEFT JOIN genomic_align_block ".
00262       " USING (genomic_align_block_id) WHERE genomic_align_block.genomic_align_block_id IS NULL");
00263 
00264   if ($count1 > 0) {
00265     die("There are $count1 orphan conservation scores!\n");
00266   } else {
00267     $self->warning("conservation score external references are OK.");
00268   }
00269 
00270   my $meta_container = $self->compara_dba->get_MetaContainer();
00271 
00272   my $method_link_species_set_ids;
00273 
00274   if ($method_link_species_set_id) {
00275     my ($aln_mlss_id) = @{$meta_container->list_value_by_key("gerp_".$method_link_species_set_id)};
00276     if (!$aln_mlss_id) {
00277       die "The meta table does not contain the gerp_$method_link_species_set_id entry!\n";
00278     }
00279     $method_link_species_set_ids = [$aln_mlss_id];
00280   } else {
00281     $method_link_species_set_ids = $self->compara_dba->dbc->db_handle->selectcol_arrayref(
00282         "SELECT DISTINCT method_link_species_set_id FROM conservation_score LEFT JOIN genomic_align_block ".
00283         " USING (genomic_align_block_id)");
00284   }
00285 
00286   foreach my $this_method_link_species_set_id (@$method_link_species_set_ids) {
00287     my $gerp_key = $self->compara_dba->dbc->db_handle->selectrow_array(
00288         "SELECT meta_key FROM meta WHERE meta_key LIKE \"gerp_%\" AND meta_value".
00289         " = \"$this_method_link_species_set_id\"");
00290     if (!$gerp_key) {
00291       die "There is no gerp_% entry in the meta table for mlss=".$this_method_link_species_set_id.
00292           "alignments!\n";
00293     } else {
00294       $self->warning("meta entry for $gerp_key: OK.");
00295     }
00296 
00297     my ($values) = $self->compara_dba->dbc->db_handle->selectcol_arrayref(
00298         "SELECT genomic_align_block.genomic_align_block_id FROM genomic_align_block LEFT JOIN genomic_align".
00299         " ON (genomic_align_block.genomic_align_block_id = genomic_align.genomic_align_block_id)".
00300         " LEFT JOIN conservation_score".
00301         " ON (genomic_align_block.genomic_align_block_id = conservation_score.genomic_align_block_id)".
00302         " WHERE genomic_align_block.method_link_species_set_id = $this_method_link_species_set_id".
00303         " AND conservation_score.genomic_align_block_id IS NULL".
00304         " GROUP BY genomic_align_block.genomic_align_block_id HAVING count(*) > 3");
00305 
00306     if (@$values) {
00307     foreach my $value (@$values) {
00308         $self->warning("gab_id $value");
00309     }
00310       die "There are ".scalar(@$values)." blocks (mlss=".$this_method_link_species_set_id.
00311           ") with more than 3 seqs and no conservation score!\n";
00312     } else {
00313       $self->warning("All alignments for mlss=$this_method_link_species_set_id and more than 3 seqs have cons.scores: OK.");
00314     }
00315   }
00316 }
00317 
00318 
00319 =head2 _run_pairwise_gabs_test
00320 
00321   Arg[1]      : string representing a hashref of options.
00322                 Possible options are:
00323                   method_link_species_set_id => method_link_species_set id for
00324                   the pairwise alignment.
00325                   method_link_type => method_link_type for pairwise segment
00326                   genome_db_ids => array of genome_db_ids
00327   Example     : $self->_run_pairwise_gabs_test();
00328   Example     : $self->_run_pairwise_gabs_test("{method_link_species_set_id=>123}");
00329   Example     : self->_run_pairwise_gabs_test("{method_link_type=>'BLASTZ_NET', genome_db_ids=>'[1,2]'}");
00330   Description : Tests whether the genomic_align_block and genomic_align tables
00331                 are not empty, whether there are twice as many genomic_aligns
00332                 as genomic_align_blocks and whether each genomic_align_block
00333                 has two genomic_aligns.
00334   Returntype  :
00335   Exceptions  : die on failure
00336   Caller      : general
00337 
00338 =cut
00339 
00340 sub _run_pairwise_gabs_test {
00341   my ($self, $parameters) = @_;
00342 
00343   my $method_link_species_set_id;
00344   my $method_link_id;
00345   my $method_link_type;
00346   my $genome_db_ids;
00347 
00348   #print "_run_pairwise_gabs_test\n";
00349 
00350   my $method_link_species_set_id = $self->param('method_link_species_set_id') if (defined($self->param('method_link_species_set_id')));
00351 
00352   my $method_link_species_set_id = $self->param('mlss_id') if (defined($self->param('mlss_id')));
00353   my $method_link_type = $self->param('method_link_type')  if (defined($self->param('method_link_type')));
00354   my $genome_db_ids = eval($self->param('genome_db_ids'))   if (defined($self->param('genome_db_ids')));
00355 
00356   $self->test_table("genomic_align_block");
00357   $self->test_table("genomic_align");
00358 
00359   my $method_link_species_set_ids;
00360   if ($method_link_species_set_id) {
00361       $method_link_species_set_ids = [$method_link_species_set_id];
00362   } elsif ($method_link_type && $genome_db_ids) {
00363       my $mlss_adaptor = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
00364       throw ("No method_link_species_set") if (!$mlss_adaptor);
00365       my $mlss = $mlss_adaptor->fetch_by_method_link_type_genome_db_ids($method_link_type, ${genome_db_ids});
00366 
00367       if (defined $mlss) {
00368       $method_link_species_set_ids = [$mlss->dbID];
00369       }
00370   } else {
00371       $method_link_species_set_ids = $self->compara_dba->dbc->db_handle->selectcol_arrayref(
00372      "SELECT DISTINCT method_link_species_set_id FROM genomic_align_block");
00373   }
00374 
00375   foreach my $this_method_link_species_set_id (@$method_link_species_set_ids) {
00376 
00377       ## Get the number of genomic_align_blocks
00378       my $count1 = $self->compara_dba->dbc->db_handle->selectrow_array(
00379             "SELECT COUNT(*) FROM genomic_align_block WHERE method_link_species_set_id = \"$this_method_link_species_set_id\"");
00380 
00381       ## Get the number of genomic_aligns
00382       my $count2 = $self->compara_dba->dbc->db_handle->selectrow_array(
00383                      "SELECT COUNT(*) FROM genomic_align_block gab LEFT JOIN genomic_align USING (genomic_align_block_id) WHERE gab.method_link_species_set_id = \"$this_method_link_species_set_id\"");
00384 
00385       ## Get the number of genomic_align_blocks which don't have 2 genomic_aligns
00386       my $count3 =  $self->compara_dba->dbc->db_handle->selectrow_array(
00387     "SELECT COUNT(*) FROM (SELECT * FROM genomic_align WHERE method_link_species_set_id = \"$this_method_link_species_set_id\" GROUP BY genomic_align_block_id HAVING COUNT(*)!=2) cnt");
00388 
00389       #get the name for the method_link_species_set_id
00390       my $name = $self->compara_dba->dbc->db_handle->selectrow_array(
00391            "SELECT name FROM method_link_species_set WHERE method_link_species_set_id = \"$this_method_link_species_set_id\"");
00392 
00393       #should be twice as many genomic_aligns as genomic_align_blocks for
00394       #pairwise alignments
00395       if (2*$count1 != $count2) {
00396       die("There are $count1 genomic_align_blocks for $name while there are $count2 genomic_aligns!\n");
00397       }
00398 
00399       if ($count3 != 0) {
00400       die("There are $count3 genomic_align_blocks which don't have 2 genomic_aligns for $name!\n");
00401       }
00402 
00403       $self->warning("Number of genomic_align_blocks for $name = $count1");
00404       $self->warning("Number of genomic_aligns for $name = $count2 2*$count1=" . ($count1*2));
00405       $self->warning("Number of genomic_align_blocks which don't have 2 genomic_aligns for $name = $count3");
00406   }
00407 }
00408 
00409 =head2 _run_compare_to_previous_db_test
00410 
00411   Arg[1]      : string representing a hashref of options.
00412                 Possible options are:
00413                   previous_db_url => url of the previous database. Must be
00414                   defined.
00415                   previous_method_link_species_set_id => method_link_species_set
00416                   id for the pairwise alignments in the previous database.
00417                   current_method_link_species_set_id => method_link_species_set
00418                   id for the pairwise alignments in the current (this) database.
00419                   method_link_type => method_link_type for pairwise segment
00420                   current_genome_db_ids => array of genome_db_ids for current
00421                   (this) database
00422                   max_percentage_diff => the percentage difference between the
00423                   number of genomic_align_blocks in the query and the target
00424                   databases before being flaged as an error. Default 20.
00425   Example     : $self->_run_compare_to_previous_db_test("{previous_db_url=>'mysql://anonymous@ensembldb.ensembl.org:3306/ensembl_compara_47', previous_method_link_species_set_id=>123, current_method_link_species_set_id=>123, max_percentage_diff=>20}");
00426  Example      : $self->_run_compare_to_previous_db_test("{previous_db_url=>\'mysql://anonymous\@ensembldb.ensembl.org\',method_link_type=>\'BLASTZ_NET\',current_genome_db_ids=>\'[25,22,]\'}")
00427   Description : Tests whether there are genomic_align_blocks, genomic_aligns
00428                 and method_link_species_sets in the tables and whether the
00429                 total number of genomic_align_blocks between 2 databases are
00430                 within a certain percentage of each other.
00431 
00432   Returntype  :
00433   Exceptions  : die on failure
00434   Caller      : general
00435 
00436 =cut
00437 
00438 
00439 sub _run_compare_to_previous_db_test {
00440   my ($self) = @_;
00441 
00442   #print "_run_compare_to_previous_db_test\n";
00443   
00444   my $max_percent_diff = 20;
00445   
00446   my $previous_mlss_id = $self->param('previous_method_link_species_set_id') if (defined($self->param('previous_method_link_species_set_id')));
00447   
00448   my $current_mlss_id = $self->param('current_method_link_species_set_id') if (defined($self->param('current_method_link_species_set_id')));
00449 
00450   my $previous_mlss_id = $self->param('previous_mlss_id') if (defined($self->param('previous_mlss_id')));
00451 
00452   my $current_mlss_id = $self->param('current_mlss_id') if (defined($self->param('current_mlss_id')));
00453   $current_mlss_id = $self->param('mlss_id') if (defined($self->param('mlss_id')));
00454 
00455   my $previous_db = $self->param('previous_db') if (defined($self->param('previous_db')));
00456 
00457   my $method_link_type = $self->param('method_link_type') if (defined($self->param('method_link_type')));
00458   my $current_genome_db_ids = eval($self->param('current_genome_db_ids')) if (defined($self->param('current_genome_db_ids')));
00459   $max_percent_diff = $self->param('max_percentage_diff') if (defined($self->param('max_percentage_diff')));
00460 
00461   my $ensembl_release = $self->param('ensembl_release');
00462   my $prev_release;
00463   if ($self->param('prev_release') == 0) {
00464       $self->param('prev_release', ($ensembl_release-1));
00465   }
00466 
00467   $self->throw("Must define previous database") if (!defined($self->param('previous_db')));
00468 
00469   $self->test_table("genomic_align_block");
00470   $self->test_table("genomic_align");
00471   $self->test_table("method_link_species_set");
00472 
00473   #Check if $previous_db is a hash
00474   if ((ref($previous_db) eq "HASH") && !defined($previous_db->{'-dbname'})) {
00475       my $dbname = "ensembl_compara_" . $self->param('prev_release');
00476       $previous_db->{'-dbname'} = $dbname;
00477   }
00478   
00479   #Load previous url
00480   my $previous_compara_dba = $self->go_figure_compara_dba($previous_db);
00481 
00482   #get the previous method_link_species_set adaptor
00483   my $previous_mlss_adaptor = $previous_compara_dba->get_MethodLinkSpeciesSetAdaptor;
00484   throw ("No method_link_species_set") if (!$previous_mlss_adaptor);
00485 
00486   my $previous_genome_db_adaptor = $previous_compara_dba->get_GenomeDBAdaptor;
00487   throw ("No genome_db") if (!$previous_genome_db_adaptor);
00488 
00489 
00490   #get the current method_link_species_set adaptor
00491   my $current_mlss_adaptor = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
00492   throw ("No method_link_species_set") if (!$current_mlss_adaptor);
00493 
00494   #get the current genome_db adaptor
00495   my $current_genome_db_adaptor = $self->compara_dba->get_GenomeDBAdaptor;
00496   throw ("No genome_db_adaptor") if (!$current_genome_db_adaptor);
00497 
00498   #get the current method_link_species_set object from method_link_type and
00499   #current genome_db_ids
00500   if (defined $self->param('method_link_type') && defined $self->param('current_genome_db_ids')) {
00501       my $current_mlss = $current_mlss_adaptor->fetch_by_method_link_type_genome_db_ids($self->param('method_link_type'), $self->param('current_genome_db_ids'));
00502       if (defined $current_mlss) {
00503       $current_mlss_id = $current_mlss->dbID;
00504       }
00505   } elsif (defined $current_mlss_id) {
00506       my $mlss = $current_mlss_adaptor->fetch_by_dbID($current_mlss_id);
00507       $method_link_type = $mlss->method_link_type;
00508       @$current_genome_db_ids = map {$_->dbID} @{$mlss->species_set};
00509   } else {
00510       $self->throw("No current_mlss_id or method_link_type and current_genome_db_ids set\n");
00511   }
00512 
00513   #get the previous method_link_species_set object from the method_link_type and
00514   #species corresponding to the current genome_db_ids
00515   if (defined $method_link_type && defined $current_genome_db_ids) {
00516       my $previous_gdbs;
00517 
00518       #covert genome_db_ids into species names
00519       foreach my $g_db_id (@$current_genome_db_ids) {
00520       my $g_db = $current_genome_db_adaptor->fetch_by_dbID($g_db_id);
00521 
00522       my $previous_gdb = eval{$previous_genome_db_adaptor->fetch_by_name_assembly($g_db->name)};
00523       if (!$previous_gdb) {
00524           $self->warning($g_db->name, " does not exist in the previous database (" . $previous_compara_dba->dbc->dbname . ")");
00525           return;
00526       }
00527       push @$previous_gdbs, $previous_gdb->dbID;
00528       }
00529 
00530       #find corresponding method_link_species_set in previous database
00531       my $previous_mlss;
00532       eval {
00533       $previous_mlss = $previous_mlss_adaptor->fetch_by_method_link_type_genome_db_ids($method_link_type, $previous_gdbs);
00534       };
00535 
00536       #Catch throw if these species do not exist in the previous database
00537       #and return success.
00538       if ($@ || !defined $previous_mlss) {
00539         my @names = map { $previous_genome_db_adaptor->fetch_by_dbID($_)->name() } @$previous_gdbs;
00540     print ("This pair of species (" .(join ",", @names) . ") with this method_link $method_link_type not do exist in this database " . $previous_compara_dba->dbc->dbname . "\n");
00541       $self->warning("This pair of species (" .(join ",", @names) . ") with this method_link $method_link_type not do exist in this database " . $previous_compara_dba->dbc->dbname);
00542       return;
00543       }
00544       $previous_mlss_id = $previous_mlss->dbID;
00545   } elsif (!defined $previous_mlss_id) {
00546       $self->throw("No previous_mlss_id or method_link_type and current_genome_db_ids set\n");
00547   }
00548 
00549   #get the name for the method_link_species_set_id
00550   my $previous_name = $previous_compara_dba->dbc->db_handle->selectrow_array(
00551     "SELECT name FROM method_link_species_set WHERE method_link_species_set_id = \"$previous_mlss_id\"");
00552 
00553   my $current_name = $self->compara_dba->dbc->db_handle->selectrow_array(
00554     "SELECT name FROM method_link_species_set WHERE method_link_species_set_id = \"$current_mlss_id\"");
00555 
00556   ## Get the number of genomic_align_blocks of previous db
00557   my $previous_count = $previous_compara_dba->dbc->db_handle->selectrow_array(
00558       "SELECT COUNT(*) FROM genomic_align_block WHERE method_link_species_set_id = \"$previous_mlss_id\"");
00559 
00560   ## Get number of genomic_align_blocks of current db
00561   my $current_count = $self->compara_dba->dbc->db_handle->selectrow_array(
00562       "SELECT COUNT(*) FROM genomic_align_block WHERE method_link_species_set_id = \"$current_mlss_id\"");
00563 
00564 
00565   ## Find percentage difference between the two
00566   my $current_percent_diff = abs($current_count-$previous_count)/$previous_count*100;
00567 
00568   my $c_perc = sprintf "%.2f", $current_percent_diff;
00569   ## Report an error if this is higher than max_percent_diff
00570   if ($current_percent_diff > $max_percent_diff) {
00571       die("The percentage difference between the number of genomic_align_blocks of the current database of $current_name results ($current_count) and the previous database of $previous_name results ($previous_count) is $c_perc% and is greater than $max_percent_diff%!\n");
00572   }
00573   
00574   $self->warning("The percentage difference between the number of genomic_align_blocks of the current database of $current_name results ($current_count) and the previous database of $previous_name results ($previous_count) is $c_perc% and is less than $max_percent_diff%!");
00575 
00576 }
00577 
00578 
00579 =head2 _run_left_and_right_links_in_gat_test
00580 
00581   Arg[1]      : -none-
00582   Example     : $self->_run_left_and_right_links_in_gat_test();
00583   Description : Tests whether all the trees in the genomic_align_tree table
00584                 are linked to other trees via their left and right node ids.
00585   Returntype  :
00586   Exceptions  : die on failure
00587   Caller      : general
00588 
00589 =cut
00590 
00591 sub _run_left_and_right_links_in_gat_test {
00592   my ($self) = @_;
00593   my $table_name = "genomic_align_tree";
00594 
00595   ## check the table is not empty
00596   my $count = $self->compara_dba->dbc->db_handle->selectrow_array(
00597       "SELECT count(*) FROM $table_name gat1 LEFT JOIN $table_name gat2 ON (gat1.node_id = gat2.root_id)".
00598       " WHERE gat1.parent_id = 0 GROUP BY gat1.node_id".
00599       " HAVING GROUP_CONCAT(gat2.left_node_id ORDER BY gat2.left_node_id) LIKE \"0%,0\"".
00600       "  AND GROUP_CONCAT(gat2.right_node_id ORDER BY gat2.right_node_id) LIKE \"0%,0\"");
00601 
00602   if ($count == 0) {
00603     $self->warning("All trees in $table_name are linked to their neighbours: OK.");
00604   } else {
00605     die("Some entries ($count) in the $table_name table are not linked!\n");
00606   }
00607 }
00608 
00609 1;