Archive Ensembl HomeArchive Ensembl Home
CreateAlignmentNetsJobs.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::CreateAlignmentNetsJobs
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db      = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $runnableDB = Bio::EnsEMBL::Pipeline::RunnableDB::CreateAlignmentNetsJobs->new (
00018                                                     -input_id   => $input_id
00019                                                     -analysis   => $analysis );
00020 $runnableDB->fetch_input(); #reads from DB
00021 $runnableDB->run();
00022 $runnableDB->output();
00023 $runnableDB->write_output(); #writes to DB
00024 
00025 =cut
00026 
00027 =head1 DESCRIPTION
00028 
00029 =cut
00030 
00031 =head1 CONTACT
00032 
00033 Abel Ureta-Vidal <abel@ebi.ac.uk>
00034 
00035 =cut
00036 
00037 =head1 APPENDIX
00038 
00039 The rest of the documentation details each of the object methods.
00040 Internal methods are usually preceded with a _
00041 
00042 =cut
00043 
00044 package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::CreateAlignmentNetsJobs;
00045 
00046 use strict;
00047 
00048 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00049 use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
00050 use Bio::EnsEMBL::Hive::Process;
00051 use Bio::EnsEMBL::Utils::Exception;
00052 
00053 our @ISA = qw(Bio::EnsEMBL::Hive::Process);
00054 
00055 my $DEFAULT_DUMP_MIN_SIZE = 11500000;
00056 
00057 sub fetch_input {
00058   my $self = shift;
00059 
00060   #
00061   # parameters which can be set either via
00062   # $self->parameters OR
00063   # $self->input_id
00064   #
00065   $self->{'query_collection'} = undef;
00066 #  $self->{'target_collection'} = undef;
00067   $self->{'query_genome_db'} = undef;
00068   $self->{'target_genome_db'} = undef;
00069   $self->{'method_link_species_set'} = undef;
00070 
00071   $self->get_params($self->parameters);
00072   $self->get_params($self->input_id);
00073 
00074   # create a Compara::DBAdaptor which shares my DBConnection
00075   $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
00076   
00077   # get DnaCollection of query
00078   throw("must specify 'collection_name' to identify DnaCollection of query") 
00079     unless(defined($self->{'collection_name'}));
00080   $self->{'collection'} = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->
00081                                 fetch_by_set_description($self->{'collection_name'});
00082   throw("unable to find DnaCollection with name : ". $self->{'collection_name'})
00083     unless(defined($self->{'collection'}));
00084 
00085   # get genome_db of query
00086   throw("must specify 'query_genome_db_id' to identify DnaCollection of query") 
00087     unless(defined($self->{'query_genome_db_id'}));
00088   $self->{'query_genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->
00089                                 fetch_by_dbID($self->{'query_genome_db_id'});
00090   throw("unable to find genome_db with dbID : ". $self->{'query_genome_db_id'})
00091     unless(defined($self->{'query_genome_db'}));
00092   
00093   # get genome_db of target
00094   throw("must specify 'target_genome_db_id' to identify DnaCollection of target") 
00095     unless(defined($self->{'target_genome_db_id'}));
00096   $self->{'target_genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->
00097                                 fetch_by_dbID($self->{'target_genome_db_id'});
00098   throw("unable to find genome_db with dbID : ". $self->{'target_genome_db_id'})
00099     unless(defined($self->{'target_genome_db'}));
00100 
00101   # get the MethodLinkSpeciesSet
00102   throw("must specify a method_link to identify a MethodLinkSpeciesSet") 
00103     unless(defined($self->{'method_link'}));
00104   if ($self->{'query_genome_db'}->dbID == $self->{'target_genome_db'}->dbID) {
00105     $self->{'method_link_species_set'} = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db'}->dbID]);
00106   } else {
00107     $self->{'method_link_species_set'} = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db'}->dbID, $self->{'target_genome_db'}->dbID] );
00108   }
00109   throw("unable to find method_link_species_set for method_link=" . $self->{'method_link'} . " and the following genome_db_ids " . $self->{'query_genome_db_id'}. ", " .$self->{'target_genome_db_id'} . "\n")
00110     unless(defined($self->{'method_link_species_set'}));
00111 
00112   $self->print_params;
00113 
00114   return 1;
00115 }
00116 
00117 
00118 sub run
00119 {
00120   my $self = shift;
00121   $self->createAlignmentNetsJobs();
00122   return 1;
00123 }
00124 
00125 
00126 sub write_output
00127 {
00128   my $self = shift;
00129   my $output_id = "{\'query_genome_db_id\' => \'" . $self->{'query_genome_db_id'} . "\',\'target_genome_db_id\' => \'" . $self->{'target_genome_db_id'} . "\'}";
00130   $self->dataflow_output_id($output_id, 2);
00131   return 1;
00132 }
00133 
00134 ##################################
00135 #
00136 # subroutines
00137 #
00138 ##################################
00139 
00140 sub get_params {
00141   my $self         = shift;
00142   my $param_string = shift;
00143 
00144   return unless($param_string);
00145   print("parsing parameter string : ",$param_string,"\n");
00146   
00147   my $params = eval($param_string);
00148   return unless($params);
00149 
00150   foreach my $key (keys %$params) {
00151     print("  $key : ", $params->{$key}, "\n");
00152   }
00153 
00154   # from input_id
00155   #{'method_link'=>'BLASTZ_CHAIN','query_genome_db_id'=>'1','target_genome_db_id'=>'3','collection_name'=>'human for chain','logic_name'=>'AlignmentNets-29436b68','group_type'=>'chain'}
00156 
00157   $self->{'query_genome_db_id'} = $params->{'query_genome_db_id'} if(defined($params->{'query_genome_db_id'}));
00158   $self->{'target_genome_db_id'} = $params->{'target_genome_db_id'} if(defined($params->{'target_genome_db_id'}));
00159   $self->{'collection_name'} = $params->{'collection_name'} if(defined($params->{'collection_name'}));
00160   $self->{'method_link'} = $params->{'method_link'} if(defined($params->{'method_link'}));
00161   $self->{'group_type'} = $params->{'group_type'} if(defined($params->{'group_type'}));;
00162   $self->{'logic_name'} = $params->{'logic_name'} if(defined($params->{'logic_name'}));
00163 
00164   # from parameters
00165   # nothing
00166   return;
00167 }
00168 
00169 
00170 sub print_params {
00171   my $self = shift;
00172 
00173   printf(" params:\n");
00174   printf("   method_link_species_set_id : %d\n", $self->{'method_link_species_set'}->dbID);
00175   printf("   collection           : (%d) %s\n", 
00176          $self->{'collection'}->dbID, $self->{'collection'}->description);
00177   printf("   query_genome_db           : (%d) %s\n", 
00178          $self->{'query_genome_db'}->dbID, $self->{'query_genome_db'}->name);
00179   printf("   target_genome_db          : (%d) %s\n",
00180          $self->{'target_genome_db'}->dbID, $self->{'target_genome_db'}->name);
00181 }
00182 
00183 
00184 sub createAlignmentNetsJobs
00185 {
00186   my $self = shift;
00187 
00188   my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->{'logic_name'});
00189 
00190   my $query_dna_list  = $self->{'collection'}->get_all_dna_objects;
00191 
00192   my $count=0;
00193 #  my $sql ="select group_id,min(dnafrag_start) as min,max(dnafrag_end) as max from genomic_align ga, genomic_align_group gag where ga.genomic_align_id=gag.genomic_align_id and ga.method_link_species_set_id = ? and ga.dnafrag_id= ? and gag.type = ? group by group_id order by min asc,max asc";
00194 
00195   my $sql = "select ga.dnafrag_start, ga.dnafrag_end from genomic_align ga, genomic_align_block gab where ga.genomic_align_block_id=gab.genomic_align_block_id and ga.method_link_species_set_id= ? and ga.dnafrag_id= ? order by dnafrag_start asc, dnafrag_end asc";
00196 
00197   my $sth = $self->{'comparaDBA'}->dbc->prepare($sql);
00198 
00199   foreach my $qy_dna_object (@{$query_dna_list}) {
00200     my $qy_dnafrag_id = $qy_dna_object->dnafrag->dbID;
00201     $sth->execute($self->{'method_link_species_set'}->dbID, $qy_dnafrag_id);
00202     my ($dnafrag_start,$dnafrag_end);
00203     $sth->bind_columns(\$dnafrag_start, \$dnafrag_end);
00204     my ($slice_start,$slice_end);
00205     my @genomic_slices;
00206     while ($sth->fetch()) {
00207       unless (defined $slice_start) {
00208         ($slice_start,$slice_end) = ($dnafrag_start, $dnafrag_end);
00209         next;
00210       }
00211       if ($dnafrag_start > $slice_end) {
00212         push @genomic_slices, [$slice_start,$slice_end];
00213         ($slice_start,$slice_end) = ($dnafrag_start, $dnafrag_end);
00214       } else {
00215         if ($dnafrag_end > $slice_end) {
00216           $slice_end = $dnafrag_end;
00217         }
00218       }
00219     }
00220     $sth->finish;
00221 
00222     # Skip if no alignments are found on this slice
00223     next if (!defined $slice_start || !defined $slice_end);
00224 
00225     push @genomic_slices, [$slice_start,$slice_end];
00226 
00227     my @grouped_genomic_slices;
00228     undef $slice_start;
00229     undef $slice_end;
00230     my $max_slice_length = 500000;
00231     while (my $genomic_slices = shift @genomic_slices) {
00232       my ($start, $end) = @{$genomic_slices};
00233       unless (defined $slice_start) {
00234         ($slice_start,$slice_end) = ($start, $end);
00235         next;
00236       }
00237       my $slice_length = $slice_end - $slice_start + 1;
00238       my $length = $end - $start + 1;
00239       if ($slice_length > $max_slice_length || $slice_length + $length > $max_slice_length) {
00240         push @grouped_genomic_slices, [$slice_start,$slice_end];
00241         ($slice_start,$slice_end) = ($start, $end);
00242       } else {
00243         $slice_end = $end;
00244       }
00245     }
00246     push @grouped_genomic_slices, [$slice_start,$slice_end];
00247 
00248     while (my $genomic_slices = shift @grouped_genomic_slices) {
00249       my ($start, $end) = @{$genomic_slices};
00250       my $input_hash = {};
00251       $input_hash->{'start'} = $start;
00252       $input_hash->{'end'} = $end;
00253       $input_hash->{'DnaFragID'} = $qy_dnafrag_id;
00254       $input_hash->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
00255 
00256       Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob(
00257           -input_id       => $input_hash,
00258           -analysis       => $analysis,
00259           -input_job_id   => $self->input_job->dbID,
00260       );
00261       $count++;
00262     }
00263   }
00264   if ($count == 0) {
00265     # No alignments have been found. Remove the control rule to unblock following analyses
00266     my $analysis_ctrl_rule_adaptor = $self->db->get_AnalysisCtrlRuleAdaptor;
00267     $analysis_ctrl_rule_adaptor->remove_by_condition_analysis_url($analysis->logic_name);
00268     print "No jobs created. Deleting analysis ctrl rule for " . $analysis->logic_name . "\n";
00269   } else {
00270     printf("created %d jobs for AlignmentNets\n", $count);
00271   }
00272 }
00273 
00274 1;