Archive Ensembl HomeArchive Ensembl Home
CreateAlignmentChainsJobs.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::CreateAlignmentChainsJobs
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db      = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $runnableDB = Bio::EnsEMBL::Compara::RunnableDB::CreateAlignmentChainsJobs->new (
00018                                                     -input_id   => $input_id
00019                                                     -analysis   => $analysis );
00020 $runnableDB->fetch_input(); #reads from DB
00021 $runnableDB->run();
00022 $runnableDB->output();
00023 $runnableDB->write_output(); #writes to DB
00024 
00025 =cut
00026 
00027 =head1 DESCRIPTION
00028 
00029 =cut
00030 
00031 =head1 CONTACT
00032 
00033 Abel Ureta-Vidal <abel@ebi.ac.uk>
00034 
00035 =cut
00036 
00037 =head1 APPENDIX
00038 
00039 The rest of the documentation details each of the object methods.
00040 Internal methods are usually preceded with a _
00041 
00042 =cut
00043 
00044 package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::CreateAlignmentChainsJobs;
00045 
00046 use strict;
00047 
00048 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00049 use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
00050 use Bio::EnsEMBL::Hive::Process;
00051 use Bio::EnsEMBL::Utils::Exception;
00052 
00053 our @ISA = qw(Bio::EnsEMBL::Hive::Process);
00054 
00055 #my $DEFAULT_DUMP_MIN_SIZE = 11500000;
00056 my $DEFAULT_OUTPUT_METHOD_LINK = "BLASTZ_CHAIN";
00057 
00058 sub fetch_input {
00059   my $self = shift;
00060 
00061   #
00062   # parameters which can be set either via
00063   # $self->parameters OR
00064   # $self->input_id
00065   #
00066   $self->{'query_collection'} = undef;
00067   $self->{'target_collection'} = undef;
00068   $self->{'query_genome_db'} = undef;
00069   $self->{'target_genome_db'} = undef;
00070   $self->{'method_link_species_set'} = undef;
00071   $self->{'output_method_link'} = undef;
00072 
00073   $self->get_params($self->parameters);
00074   $self->get_params($self->input_id);
00075 
00076   # create a Compara::DBAdaptor which shares my DBConnection
00077   $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
00078   
00079   # get DnaCollection of query
00080   throw("must specify 'query_collection_name' to identify DnaCollection of query") 
00081     unless(defined($self->{'query_collection_name'}));
00082   $self->{'query_collection'} = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->
00083                                 fetch_by_set_description($self->{'query_collection_name'});
00084   throw("unable to find DnaCollection with name : ". $self->{'query_collection_name'})
00085     unless(defined($self->{'query_collection'}));
00086 
00087   # get DnaCollection of target
00088   throw("must specify 'target_collection_name' to identify DnaCollection of query") 
00089     unless(defined($self->{'target_collection_name'}));
00090   $self->{'target_collection'} = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->
00091                                 fetch_by_set_description($self->{'target_collection_name'});
00092   throw("unable to find DnaCollection with name : ". $self->{'target_collection_name'})
00093     unless(defined($self->{'target_collection'}));
00094 
00095   # get genome_db of query
00096   throw("must specify 'query_genome_db_id' to identify DnaCollection of query") 
00097     unless(defined($self->{'query_genome_db_id'}));
00098   $self->{'query_genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->
00099                                 fetch_by_dbID($self->{'query_genome_db_id'});
00100   throw("unable to find genome_db with dbID : ". $self->{'query_genome_db_id'})
00101     unless(defined($self->{'query_genome_db'}));
00102   
00103   # get genome_db of target
00104   throw("must specify 'target_genome_db_id' to identify DnaCollection of target") 
00105     unless(defined($self->{'target_genome_db_id'}));
00106   $self->{'target_genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->
00107                                 fetch_by_dbID($self->{'target_genome_db_id'});
00108   throw("unable to find genome_db with dbID : ". $self->{'target_genome_db_id'})
00109     unless(defined($self->{'target_genome_db'}));
00110 
00111   # get the MethodLinkSpeciesSet
00112   throw("must specify a method_link to identify a MethodLinkSpeciesSet") 
00113     unless(defined($self->{'method_link'}));
00114   if ($self->{'query_genome_db'}->dbID == $self->{'target_genome_db'}->dbID) {
00115     $self->{'method_link_species_set'} = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db'}->dbID] );
00116   } else {
00117     $self->{'method_link_species_set'} = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db'}->dbID, $self->{'target_genome_db'}->dbID] );
00118   }
00119   throw("unable to find method_link_species_set for method_link=",$self->{'method_link'}," and the following genome_db_ids ",$self->{'query_genome_db_id'},", ",$self->{'target_genome_db_id'},"\n")
00120     unless(defined($self->{'method_link_species_set'}));
00121 
00122   $self->print_params;
00123     
00124   
00125   return 1;
00126 }
00127 
00128 
00129 sub run
00130 {
00131   my $self = shift;
00132   $self->createAlignmentChainsJobs();
00133   return 1;
00134 }
00135 
00136 
00137 sub write_output
00138 {
00139   my $self = shift;
00140   my $output_id = "{\'query_genome_db_id\' => \'" . $self->{'query_genome_db_id'} . "\',\'target_genome_db_id\' => \'" . $self->{'target_genome_db_id'} . "\'}";
00141   $self->dataflow_output_id($output_id, 2);
00142   return 1;
00143 }
00144 
00145 ##################################
00146 #
00147 # subroutines
00148 #
00149 ##################################
00150 
00151 sub get_params {
00152   my $self         = shift;
00153   my $param_string = shift;
00154 
00155   return unless($param_string);
00156   print("parsing parameter string : ",$param_string,"\n");
00157   
00158   my $params = eval($param_string);
00159   return unless($params);
00160 
00161   foreach my $key (keys %$params) {
00162     print("  $key : ", $params->{$key}, "\n");
00163   }
00164 
00165   # from input_id
00166   $self->{'query_genome_db_id'} = $params->{'query_genome_db_id'} if(defined($params->{'query_genome_db_id'}));
00167   $self->{'target_genome_db_id'} = $params->{'target_genome_db_id'} if(defined($params->{'target_genome_db_id'}));
00168   $self->{'query_collection_name'} = $params->{'query_collection_name'} if(defined($params->{'query_collection_name'}));
00169   $self->{'target_collection_name'} = $params->{'target_collection_name'} if(defined($params->{'target_collection_name'}));
00170   $self->{'logic_name'} = $params->{'logic_name'} if(defined($params->{'logic_name'}));
00171 
00172   # from parameters
00173   ## method_link alias for input_method_link, for backwards compatibility
00174   $self->{'method_link'} = $params->{'method_link'} if(defined($params->{'method_link'}));
00175   $self->{'method_link'} = $params->{'input_method_link'} if(defined($params->{'input_method_link'}));
00176   $self->{'output_method_link'} = $params->{'output_method_link'} if(defined($params->{'output_method_link'}));
00177 
00178 
00179   return;
00180 }
00181 
00182 
00183 sub print_params {
00184   my $self = shift;
00185 
00186   printf(" params:\n");
00187   printf("   method_link_species_set_id : %d\n", $self->{'method_link_species_set'}->dbID);
00188   printf("   query_collection           : (%d) %s\n", 
00189          $self->{'query_collection'}->dbID, $self->{'query_collection'}->description);
00190   printf("   target_collection          : (%d) %s\n",
00191          $self->{'target_collection'}->dbID, $self->{'target_collection'}->description);
00192   printf("   query_genome_db           : (%d) %s\n", 
00193          $self->{'query_genome_db'}->dbID, $self->{'query_genome_db'}->name);
00194   printf("   target_genome_db          : (%d) %s\n",
00195          $self->{'target_genome_db'}->dbID, $self->{'target_genome_db'}->name);
00196 }
00197 
00198 
00199 sub createAlignmentChainsJobs
00200 {
00201   my $self = shift;
00202 
00203   #my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('AlignmentChains');
00204   my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->{'logic_name'});
00205 
00206   my (%qy_dna_hash, %tg_dna_hash);
00207 
00208   foreach my $obj (@{$self->{'query_collection'}->get_all_dna_objects}) {
00209     my @dna_chunks;
00210     if ($obj->isa("Bio::EnsEMBL::Compara::Production::DnaFragChunkSet")) {
00211       push @dna_chunks, @{$obj->get_all_DnaFragChunks};
00212     } else {
00213       push @dna_chunks, $obj;
00214     }
00215     foreach my $chunk (@dna_chunks) {
00216       my $dnafrag = $chunk->dnafrag;
00217       if (not exists $qy_dna_hash{$dnafrag->dbID}) {
00218         $qy_dna_hash{$dnafrag->dbID} = $dnafrag;
00219       }
00220     }
00221   }
00222   my %target_dna_hash;
00223   foreach my $dna_object (@{$self->{'target_collection'}->get_all_dna_objects}) {
00224     my @dna_chunks;
00225     if ($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunkSet')) {
00226       push @dna_chunks, @{$dna_object->get_all_DnaFragChunks};
00227     } else {
00228       push @dna_chunks, $dna_object;
00229     }
00230     foreach my $chunk (@dna_chunks) {
00231       my $dnafrag = $chunk->dnafrag;
00232       if (not exists $tg_dna_hash{$dnafrag->dbID}) {
00233         $tg_dna_hash{$chunk->dnafrag->dbID} = $dnafrag;
00234       }
00235     }
00236   }
00237 
00238   my $count=0;
00239 
00240   my $sql = "select g2.dnafrag_id from genomic_align g1, genomic_align g2 where g1.method_link_species_set_id = ? and g1.genomic_align_block_id=g2.genomic_align_block_id and g1.dnafrag_id = ? and g1.genomic_align_id != g2.genomic_align_id group by g2.dnafrag_id";
00241   my $sth = $self->{'comparaDBA'}->dbc->prepare($sql);
00242 
00243   my $reverse_pairs; # used to avoid getting twice the same results for self-comparisons
00244   foreach my $qy_dnafrag_id (keys %qy_dna_hash) {
00245     $sth->execute($self->{'method_link_species_set'}->dbID, $qy_dnafrag_id);
00246 
00247     my $tg_dnafrag_id;
00248     $sth->bind_columns(\$tg_dnafrag_id);
00249     while ($sth->fetch()) {
00250 
00251       next unless exists $tg_dna_hash{$tg_dnafrag_id};
00252       next if (defined($reverse_pairs->{$qy_dnafrag_id}->{$tg_dnafrag_id}));
00253       
00254       my $input_hash = {};
00255       $input_hash->{'qyDnaFragID'} = $qy_dnafrag_id;
00256       $input_hash->{'tgDnaFragID'} = $tg_dnafrag_id;
00257 
00258 
00259       if ($self->{'query_collection'}->dump_loc) {
00260         my $nib_file = $self->{'query_collection'}->dump_loc 
00261             . "/" 
00262             . $qy_dna_hash{$qy_dnafrag_id}->name 
00263             . ".nib";
00264         if (-e $nib_file) {
00265           $input_hash->{'query_nib_dir'} = $self->{'query_collection'}->dump_loc;
00266         }
00267       }
00268       if ($self->{'target_collection'}->dump_loc) {
00269         my $nib_file = $self->{'target_collection'}->dump_loc 
00270             . "/" 
00271             . $tg_dna_hash{$tg_dnafrag_id}->name
00272             . ".nib";
00273         if (-e $nib_file) {
00274           $input_hash->{'target_nib_dir'} = $self->{'target_collection'}->dump_loc;
00275         }
00276       }
00277       $reverse_pairs->{$tg_dnafrag_id}->{$qy_dnafrag_id} = 1;
00278 
00279       Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob(
00280             -input_id       => $input_hash,
00281             -analysis       => $analysis,
00282             -input_job_id   => $self->input_job->dbID,
00283       );
00284       $count++;
00285     }
00286   }
00287   $sth->finish;
00288 
00289   if ($count == 0) {
00290     # No alignments have been found. Remove the control rule to unblock following analyses
00291     my $analysis_ctrl_rule_adaptor = $self->db->get_AnalysisCtrlRuleAdaptor;
00292     $analysis_ctrl_rule_adaptor->remove_by_condition_analysis_url($analysis->logic_name);
00293     print "No jobs created. Deleting analysis ctrl rule for " . $analysis->logic_name . "\n";
00294   } else {
00295     printf("created %d jobs for AlignmentChains\n", $count);
00296   }
00297 
00298 
00299   ## Create new MethodLinkSpeciesSet
00300   # Use the value set in the parameters or the input_id
00301   my $output_method_link = $self->{'output_method_link'};
00302   # Use the output_method_link from the target analysis if the previous is not set
00303   if (!$output_method_link and $self->{'logic_name'}) {
00304     my $params = eval($analysis->parameters);
00305     if ($params->{'output_method_link'}) {
00306       $output_method_link = $params->{'output_method_link'}
00307     }
00308   }
00309   # Use the default value otherwise
00310   if (!$output_method_link) {
00311     $output_method_link = $DEFAULT_OUTPUT_METHOD_LINK;
00312   }
00313   my $new_mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet(
00314       -species_set => $self->{'method_link_species_set'}->species_set,
00315       -method_link_type => $output_method_link
00316     );
00317   $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->store($new_mlss);
00318 
00319 }
00320 
00321 1;