Archive Ensembl HomeArchive Ensembl Home
PeptideMemberGroupingFactory.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::PeptideMemberGroupingFactory
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $aa = $sdba->get_AnalysisAdaptor;
00017 my $analysis = $aa->fetch_by_logic_name('PeptideMemberGroupingFactory');
00018 my $rdb = new Bio::EnsEMBL::Compara::RunnableDB::PeptideMemberGroupingFactory();
00019 $rdb->fetch_input;
00020 $rdb->run;
00021 
00022 =cut
00023 
00024 =head1 DESCRIPTION
00025 
00026 Given a list of genomedb_ids dataflows a fan of jobs with ENSEMBLPEP member_ids.
00027 One job will contain 20 or less member_ids belonging to the same genome_db_id.
00028 
00029 =cut
00030 
00031 =head1 APPENDIX
00032 
00033 The rest of the documentation details each of the object methods. 
00034 Internal methods are usually preceded with a _
00035 
00036 =cut
00037 
00038 package Bio::EnsEMBL::Compara::RunnableDB::PeptideMemberGroupingFactory;
00039 
00040 use strict;
00041 
00042 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00043 
00044 
00045 sub param_defaults {
00046     return {
00047         'group_size'        => 20,
00048         'species_set'       => [],
00049     };
00050 }
00051 
00052 
00053 sub fetch_input {
00054     my $self = shift @_;
00055 
00056     my $genome_db_id = $self->param('genome_db_id') || $self->param('genome_db_id', $self->param('gdb'));
00057     my $species_set = ($genome_db_id ? [ $genome_db_id ] : $self->param('species_set'))
00058         or die "Either 'species_set' list or 'genome_db_id' parameter has to be defined";
00059 
00060     my $genomedb_adaptor = $self->compara_dba->get_GenomeDBAdaptor;
00061     my $member_adaptor   = $self->compara_dba->get_MemberAdaptor;
00062 
00063     my @member_ids = ();
00064     foreach my $gdb_id (@$species_set) {
00065         foreach my $member (@{$member_adaptor->fetch_all_by_source_genome_db_id('ENSEMBLPEP', $gdb_id)}) {
00066             push @member_ids, $member->member_id;
00067         }
00068     }
00069 
00070     $self->param('inputlist', \@member_ids);
00071 }
00072 
00073 
00074 sub write_output {
00075     my $self = shift @_;
00076 
00077     my $inputlist  = $self->param('inputlist');
00078     my $group_size = $self->param('group_size');
00079 
00080     while (@$inputlist) {
00081         my @job_array = splice(@$inputlist, 0, $group_size);
00082         $self->dataflow_output_id( { 'ids' => [@job_array] }, 2);
00083     }
00084 }
00085 
00086 
00087 1;