Archive Ensembl HomeArchive Ensembl Home
HomologyGroupingFactory.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::HomologyGroupingFactory
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $aa = $sdba->get_AnalysisAdaptor;
00017 my $analysis = $aa->fetch_by_logic_name('HomologyGroupingFactory');
00018 my $rdb = new Bio::EnsEMBL::Compara::RunnableDB::HomologyGroupingFactory(
00019                          -input_id   => [[1,2,3,14],[4,13],[11,16]]
00020                          -analysis   => $analysis);
00021 
00022 $rdb->fetch_input
00023 $rdb->run;
00024 
00025 =cut
00026 
00027 =head1 DESCRIPTION
00028 
00029 This is a homology compara specific runnableDB, that based on an input
00030 of arrayrefs of genome_db_ids, creates Homology_dNdS jobs in the hive 
00031 analysis_job table.
00032 
00033 =cut
00034 
00035 =head1 CONTACT
00036 
00037 abel@ebi.ac.uk, jessica@ebi.ac.uk
00038 
00039 =cut
00040 
00041 =head1 APPENDIX
00042 
00043 The rest of the documentation details each of the object methods. 
00044 Internal methods are usually preceded with a _
00045 
00046 =cut
00047 
00048 package Bio::EnsEMBL::Compara::RunnableDB::HomologyGroupingFactory;
00049 
00050 use strict;
00051 
00052 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00053 
00054 
00055 sub param_defaults {
00056     return {
00057             'method_link_types'  => ['ENSEMBL_ORTHOLOGUES'],
00058             'group_size'         => 20,
00059     };
00060 }
00061 
00062 
00063 sub fetch_input {
00064     my $self = shift @_;
00065 
00066     my $species_sets      = $self->param('species_sets') or die "'species_sets' is an obligatory parameter";
00067     my $method_link_types = $self->param('method_link_types');
00068 
00069     my $mlss_adaptor = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
00070 
00071     my $sql = "select homology_id from homology where method_link_species_set_id = ?";
00072     my $sth = $self->compara_dba->dbc->prepare($sql);
00073 
00074     my @homology_ids = ();
00075     foreach my $species_set (@$species_sets) {
00076         while (my $genome_db_id1 = shift @{$species_set}) {
00077             foreach my $mlt (@$method_link_types) {
00078                 if(my $mlss = $mlss_adaptor->fetch_by_method_link_type_genome_db_ids($mlt,[$genome_db_id1])) {
00079                     $sth->execute($mlss->dbID);
00080                     while( my ($homology_id) = $sth->fetchrow() ) {
00081                         push @homology_ids, $homology_id;
00082                     }
00083                 }
00084             }
00085             foreach my $genome_db_id2 (@{$species_set}) {
00086                 foreach my $mlt (@$method_link_types) {
00087                     if(my $mlss = $mlss_adaptor->fetch_by_method_link_type_genome_db_ids($mlt,[$genome_db_id1,$genome_db_id2])) {
00088                         $sth->execute($mlss->dbID);
00089                         while( my ($homology_id) = $sth->fetchrow() ) {
00090                             push @homology_ids, $homology_id;
00091                         }
00092                     }
00093                 }
00094             }
00095         }
00096     }
00097 
00098     $self->param('inputlist', \@homology_ids);
00099 }
00100 
00101 
00102 sub write_output {
00103     my $self = shift @_;
00104 
00105     my $inputlist  = $self->param('inputlist');
00106     my $group_size = $self->param('group_size');
00107 
00108     while (@$inputlist) {
00109         my @job_array = splice(@$inputlist, 0, $group_size);
00110         $self->dataflow_output_id( { 'ids' => [@job_array] }, 2);
00111     }
00112 }
00113 
00114 1;