Archive Ensembl HomeArchive Ensembl Home
ncRNAtrees_conf.pm
Go to the documentation of this file.
00001 
00002 =pod 
00003 
00004 =head1 NAME
00005 
00006   Bio::EnsEMBL::Compara::PipeConfig::ncRNAtrees_conf
00007 
00008 =head1 SYNOPSIS
00009 
00010     init_pipeline.pl Bio::EnsEMBL::Compara::PipeConfig::ncRNAtrees_conf -password <your_password>
00011 
00012 =head1 DESCRIPTION  
00013 
00014     This is an experimental PipeConfig file for ncRNAtrees pipeline (work in progress)
00015 
00016 =head1 CONTACT
00017 
00018   Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.
00019 
00020 =cut
00021 
00022 package Bio::EnsEMBL::Compara::PipeConfig::ncRNAtrees_conf ;
00023 
00024 use strict;
00025 use warnings;
00026 use base ('Bio::EnsEMBL::Compara::PipeConfig::ComparaGeneric_conf');
00027 
00028 sub default_options {
00029     my ($self) = @_;
00030     return {
00031         %{$self->SUPER::default_options},
00032 
00033         'mlss_id'           => 40080,
00034         'max_gene_count'    => 1500,
00035 
00036         'release'           => '66',
00037         'rel_suffix'        => '',    # an empty string by default, a letter otherwise
00038         'rel_with_suffix'   => $self->o('release').$self->o('rel_suffix'),
00039 
00040         'ensembl_cvs_root_dir' => $ENV{'ENSEMBL_CVS_ROOT_DIR'},
00041         'work_dir'             => '/lustre/scratch101/ensembl/'.$self->o('ENV', 'USER').'/nc_trees_'.$self->o('rel_with_suffix'),
00042         'work_dir'             => $ENV{'HOME'}.'/ncrna_trees_'.$self->o('rel_with_suffix'),
00043 
00044         'email'             => $ENV{'USER'}.'@ebi.ac.uk',    # NB: your EBI address may differ from the Sanger one!
00045 
00046         'species_tree_input_file'   => '',  # empty value means 'create using genome_db+ncbi_taxonomy information'; can be overriden by a file with a tree in it
00047 
00048         'pipeline_db' => {                                  # connection parameters
00049                           -driver => 'mysql',
00050                           -host   => 'compara4',
00051                           -port   => 3306,
00052                           -user   => 'ensadmin',
00053                           -pass   => $self->o('password'),
00054                           -dbname => $ENV{'USER'}.'_compara_nctrees_'.$self->o('rel_with_suffix'),
00055         },
00056 
00057 
00058             # executable locations:
00059             'cmalign_exe'      => '/software/ensembl/compara/infernal/infernal-1.0.2/src/cmalign',
00060             'cmbuild_exe'      => '/software/ensembl/compara/infernal/infernal-1.0.2/src/cmbuild',
00061             'cmsearch_exe'     => '/software/ensembl/compara/infernal/infernal-1.0.2/src/cmsearch',
00062             'mafft_exe'        => '/software/ensembl/compara/mafft-6.707/bin/mafft',
00063             'mafft_binaries'   => '/software/ensembl/compara/mafft-6.707/binaries',
00064             'raxml_exe'        => '/software/ensembl/compara/raxml/RAxML-7.2.8-ALPHA/raxmlHPC-SSE3',
00065             'prank_exe'        => '/software/ensembl/compara/prank/090707/src/prank',
00066             'raxmlLight_exe'   => '/software/ensembl/compara/raxml/RAxML-Light-1.0.5/raxmlLight',
00067             'parsimonator_exe' => '/software/ensembl/compara/parsimonator/Parsimonator-1.0.2/parsimonator-SSE3',
00068             'ktreedist_exe'    => '/software/ensembl/compara/ktreedist/Ktreedist.pl',
00069             'fasttree_exe'     => '/software/ensembl/compara/fasttree/FastTree',
00070             'treebest_exe'     => '/software/ensembl/compara/treebest.doubletracking',
00071             'cafe_shell'       => '/software/ensembl/compara/cafe/cafe.2.2/cafe/bin/shell',
00072 
00073             # Data needed for CAFE
00074             'species_tree_meta_key' => 'full_species_tree_string',
00075             'cafe_species'         =>  ['danio.rerio', 'taeniopygia.guttata', 'callithrix.jacchus', 'pan.troglodytes', 'homo.sapiens', 'mus.musculus'],
00076             'cafe_lambdas'         => '',  # in ncRNAs the lambda is calculated
00077             'cafe_struct_tree_str' => '',  # Not set by default
00078 
00079         'reg1' => {
00080           -host   => 'ens-staging',
00081 #           -host => 'ens-livemirror',
00082             -port   => 3306,
00083             -user   => 'ensro',
00084             -pass   => '',
00085         },
00086 
00087         'reg2' => {
00088            -host   => 'ens-staging2',
00089 #           -host => 'ens-livemirror',
00090             -port   => 3306,
00091             -user   => 'ensro',
00092             -pass   => '',
00093         },
00094 
00095         'master_db' => {
00096             -host   => 'compara1',
00097             -port   => 3306,
00098             -user   => 'ensro',
00099             -pass   => '',
00100             -dbname => 'sf5_ensembl_compara_master', # 'sf5_ensembl_compara_master',
00101         },
00102 
00103         'epo_db' => {   # ideally, the current release database with epo pipeline results already loaded
00104             -host   => 'compara1',
00105             -port   => 3306,
00106             -user   => 'ensro',
00107             -pass   => '',
00108             -dbname => 'mp12_ensembl_compara_65',
00109         },
00110     };
00111 }
00112 
00113 
00114 sub pipeline_wide_parameters {  # these parameter values are visible to all analyses, can be overridden by parameters{} and input_id{}
00115     my ($self) = @_;
00116     return {
00117         'pipeline_name'     => 'NCT_'.$self->o('rel_with_suffix'),  # name the pipeline to differentiate the submitted processes
00118         'email'             => $self->o('email'),                   # for automatic notifications (may be unsupported by your Meadows)
00119         'work_dir'          => $self->o('work_dir'),                # data directories and filenames
00120     };
00121 }
00122 
00123 sub pipeline_create_commands {
00124     my ($self) = @_;
00125     return [
00126         @{$self->SUPER::pipeline_create_commands},  # here we inherit creation of database, hive tables and compara tables
00127         'mkdir -p '.$self->o('work_dir'),
00128     ];
00129 }
00130 
00131 sub resource_classes {
00132     my ($self) = @_;
00133     return {
00134             0 => { -desc => 'default', 'LSF' => '' },
00135             1 => { -desc => 'himem'  , 'LSF' => '-q hugemem -M15000000 -R"select[mem>15000] rusage[mem=15000]"' },
00136             2 => { -desc => 'long'   , 'LSF' => '-q long' },
00137             3 => { -desc => 'CAFE'   , 'LSF' => '-S 1024 -q long' },
00138            };
00139 }
00140 
00141 sub pipeline_analyses {
00142     my ($self) = @_;
00143     return [
00144 
00145 # ---------------------------------------------[copy tables from master and fix the offsets]---------------------------------------------
00146 
00147         {   -logic_name => 'copy_table_factory',
00148             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',
00149             -parameters => {
00150                 'db_conn'   => $self->o('master_db'),
00151                 'inputlist' => [ 'method_link', 'species_set', 'method_link_species_set', 'ncbi_taxa_name', 'ncbi_taxa_node' ],
00152                 'column_names' => [ 'table' ],
00153                 'input_id'  => { 'src_db_conn' => '#db_conn#', 'table' => '#table#' },
00154                 'fan_branch_code' => 2,
00155             },
00156             -input_ids => [
00157                 {},
00158             ],
00159             -flow_into => {
00160                 2 => [ 'copy_table'  ],
00161                 1 => [ 'offset_tables' ],  # backbone
00162             },
00163         },
00164 
00165             {   -logic_name    => 'copy_table',
00166                 -module        => 'Bio::EnsEMBL::Hive::RunnableDB::MySQLTransfer',
00167                 -parameters    => {
00168                                    'mode'          => 'overwrite',
00169                                    'filter_cmd'    => 'sed "s/ENGINE=MyISAM/ENGINE=InnoDB/"',
00170                                   },
00171                 -hive_capacity => 10,
00172                 -can_be_empty => 1,
00173             },
00174 
00175         {   -logic_name => 'offset_tables',
00176             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::SqlCmd',
00177             -parameters => {
00178                 'sql'   => [
00179                     'ALTER TABLE member   AUTO_INCREMENT=200000001',
00180                     'ALTER TABLE sequence AUTO_INCREMENT=200000001',
00181                     'ALTER TABLE homology AUTO_INCREMENT=100000001',
00182                     'ALTER TABLE gene_tree_node AUTO_INCREMENT=100000001',
00183                     'ALTER TABLE CAFE_tree_node AUTO_INCREMENT=100000001',
00184                 ],
00185             },
00186             -wait_for => [ 'copy_table' ],    # have to wait until the tables have been copied
00187             -flow_into => {
00188                 1 => [ 'innodbise_table_factory' ],
00189             },
00190         },
00191 
00192 # ---------------------------------------------[turn all tables except 'genome_db' to InnoDB]---------------------------------------------
00193 
00194         {   -logic_name => 'innodbise_table_factory',
00195             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',
00196             -parameters => {
00197                 'inputquery'      => "SELECT table_name FROM information_schema.tables WHERE table_schema ='".$self->o('pipeline_db','-dbname')."' AND table_name!='meta' AND engine='MyISAM' ",
00198                 'fan_branch_code' => 2,
00199             },
00200             -flow_into => {
00201                 2 => [ 'innodbise_table'  ],
00202                 1 => [ 'load_genomedb_factory' ],
00203             },
00204         },
00205 
00206         {   -logic_name    => 'innodbise_table',
00207             -module        => 'Bio::EnsEMBL::Hive::RunnableDB::SqlCmd',
00208             -parameters    => {
00209                 'sql'         => "ALTER TABLE #table_name# ENGINE=InnoDB",
00210             },
00211             -hive_capacity => 10,
00212             -can_be_empty => 1,
00213         },
00214 
00215 # ---------------------------------------------[load GenomeDB entries from master+cores]---------------------------------------------
00216 
00217                 {   -logic_name => 'load_genomedb_factory',
00218             -module     => 'Bio::EnsEMBL::Compara::RunnableDB::ObjectFactory',
00219             -parameters => {
00220                 'compara_db'            => $self->o('master_db'),   # that's where genome_db_ids come from
00221                 'mlss_id'               => $self->o('mlss_id'),
00222 
00223                 'adaptor_name'          => 'MethodLinkSpeciesSetAdaptor',
00224                 'adaptor_method'        => 'fetch_by_dbID',
00225                 'method_param_list'     => [ '#mlss_id#' ],
00226                 'object_method'         => 'species_set',
00227 
00228                 'column_names2getters'  => { 'genome_db_id' => 'dbID', 'species_name' => 'name', 'assembly_name' => 'assembly', 'genebuild' => 'genebuild', 'locator' => 'locator' },
00229 
00230                 'fan_branch_code'       => 2,
00231             },
00232             -wait_for  => [ 'innodbise_table' ], # have to wait for both, because subfan can be empty
00233             -flow_into => {
00234                 2 => [ 'load_genomedb' ],           # fan
00235                 1 => [ 'load_genomedb_funnel', 'load_rfam_models' ],    # backbone
00236             },
00237         },
00238 
00239         {   -logic_name => 'load_genomedb',
00240             -module     => 'Bio::EnsEMBL::Compara::RunnableDB::LoadOneGenomeDB',
00241             -parameters => {
00242                 'registry_dbs'  => [ $self->o('reg1'), $self->o('reg2'), ],
00243             },
00244             -hive_capacity => 1,    # they are all short jobs, no point doing them in parallel
00245             -flow_into => {
00246                 1 => [ 'load_members_factory' ],   # each will flow into another one
00247             },
00248         },
00249 
00250         {   -logic_name => 'load_genomedb_funnel',
00251             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::Dummy',
00252             -wait_for => [ 'load_genomedb' ],
00253             -flow_into => {
00254                            1 => [ 'make_species_tree', 'create_lca_species_set' ],
00255             },
00256         },
00257 
00258 # ---------------------------------------------[load species tree]-------------------------------------------------------------------
00259 
00260 
00261         {   -logic_name    => 'make_species_tree',
00262             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::MakeSpeciesTree',
00263             -parameters    => {
00264                 'species_tree_input_file' => $self->o('species_tree_input_file'),   # empty by default, but if nonempty this file will be used instead of tree generation from genome_db
00265                 'multifurcation_deletes_node'           => [ 33316, 129949, 314146 ],
00266                 'multifurcation_deletes_all_subnodes'   => [  9347, 186625,  32561 ],
00267             },
00268             -hive_capacity => -1,   # to allow for parallelization
00269             -flow_into  => {
00270                 3 => { 'mysql:////meta' => { 'meta_key' => 'species_tree_string', 'meta_value' => '#species_tree_string#' } },
00271             },
00272         },
00273 
00274 
00275 # ---------------------------------------------[create the low-coverage-assembly species set]-----------------------------------------
00276 
00277         {   -logic_name => 'create_lca_species_set',
00278             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::SqlCmd',
00279             -parameters => {
00280                 'sql' => [  "INSERT INTO species_set (genome_db_id) SELECT genome_db_id FROM genome_db LIMIT 1",   # insert a dummy pair (auto_increment++, <anything>) into the table
00281                             "DELETE FROM species_set WHERE species_set_id IN (#_insert_id_0#)",     # delete the previously inserted row, but keep the auto_increment
00282                 ],
00283             },
00284             -hive_capacity => -1,   # to allow for parallelization
00285             -flow_into => {
00286                 2 => {
00287                     'generate_pre_species_set'     => { 'lca_species_set_id' => '#_insert_id_0#' },     # pass it on to the query
00288                     'mysql:////species_set_tag' => { 'species_set_id' => '#_insert_id_0#', 'tag' => 'name', 'value' => 'low-coverage-assembly' },   # record the id in ss_tag table
00289                 },
00290             },
00291         },
00292 
00293         {   -logic_name => 'generate_pre_species_set',
00294             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',    # another non-stardard use of JobFactory for iterative insertion
00295             -parameters => {
00296                 'db_conn'         => $self->o('epo_db'),
00297                 'inputquery'      => "SELECT #lca_species_set_id# as lca_species_set_id, GROUP_CONCAT(DISTINCT g.genome_db_id) as pre_species_set FROM genome_db g JOIN species_set ss USING(genome_db_id) JOIN method_link_species_set mlss USING(species_set_id) WHERE assembly_default AND mlss.name LIKE '%EPO_LOW_COVERAGE%' AND g.genome_db_id NOT IN (SELECT DISTINCT(g2.genome_db_id) FROM genome_db g2 JOIN species_set ss2 USING(genome_db_id) JOIN method_link_species_set mlss2 USING(species_set_id) WHERE assembly_default AND mlss2.name LIKE '%EPO')",
00298                 'fan_branch_code' => 3,
00299             },
00300             -hive_capacity => -1,   # to allow for parallelization
00301             -flow_into => {
00302                            3 => [ 'store_lca_species_set' ],
00303             },
00304         },
00305 
00306         {   -logic_name => 'store_lca_species_set',
00307             -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',    # another non-stardard use of JobFactory for iterative insertion
00308             -parameters => {
00309                 'inputquery'      => "SELECT #lca_species_set_id# as species_set_id, genome_db_id FROM genome_db where genome_db_id in (#pre_species_set#)",
00310                 'fan_branch_code' => 3,
00311             },
00312             -hive_capacity => -1,   # to allow for parallelization
00313             -flow_into => {
00314                 3 => [ 'mysql:////species_set' ],
00315             },
00316         },
00317 
00318 # ---------------------------------------------[load ncRNA and gene members and subsets]---------------------------------------------
00319 
00320         {   -logic_name    => 'load_members_factory',
00321             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::GenomePrepareNCMembers',
00322             -hive_capacity => 10,
00323             -flow_into => {
00324                 2 => [ 'load_members' ],   # per-genome fan
00325             },
00326         },
00327 
00328         {   -logic_name    => 'load_members',
00329             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::GeneStoreNCMembers',
00330             -hive_capacity => 30,
00331             -flow_into => {
00332                 3 => [ 'mysql:////subset_member' ],   # every ncrna member is added to the corresponding subset
00333                 4 => [ 'mysql:////subset_member' ],   # every gene  member is added to the corresponding subset
00334             },
00335         },
00336 
00337 # ---------------------------------------------[load RFAM models]---------------------------------------------------------------------
00338 
00339         {   -logic_name    => 'load_rfam_models',
00340             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::RFAMLoadModels',
00341             -hive_capacity => -1,   # to allow for parallelization
00342             -flow_into => {
00343                 1 => [ 'rfam_classify' ],
00344             },
00345         },
00346 
00347 # ---------------------------------------------[run RFAM classification]--------------------------------------------------------------
00348 
00349         {   -logic_name    => 'rfam_classify',
00350             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::RFAMClassify',
00351             -parameters    => {
00352                 'mlss_id'        => $self->o('mlss_id'),
00353             },
00354             -wait_for => [ 'make_species_tree', 'store_lca_species_set', 'load_members_factory', 'load_members' ], # mega-funnel
00355             -flow_into => {
00356                            2 => [ 'recover_epo', 'treebest_mmerge' ],
00357                            1 => ['db_snapshot_after_Rfam_classify', 'make_full_species_tree'],
00358             },
00359         },
00360 
00361 # -------------------------------------------------------------------------------------------------------------------
00362 
00363             {   -logic_name => 'db_snapshot_after_Rfam_classify',
00364                 -module     => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
00365                 -parameters => {
00366                                 'cmd'      => 'mysqldump '.$self->dbconn_2_mysql('pipeline_db', 0).' '.$self->o('pipeline_db','-dbname').' >#filename#',
00367                                 'filename'  => $ENV{'HOME'}.'/db_snapshot_after_Rfam_classify',
00368                                },
00369             },
00370 
00371 #--------------------------------------------------------------------------------
00372 
00373             {
00374              -logic_name => 'make_full_species_tree',
00375              -module => 'Bio::EnsEMBL::Compara::RunnableDB::MakeSpeciesTree',
00376              -parameters => {
00377                              'species_tree_input_file' => $self->o('species_tree_input_file'),   # empty by default, but if nonempty this file will be used instead of tree generation from genome_db
00378                              'species_tree_string' => '',
00379                             },
00380              -hive_capacity => -1,   # to allow for parallelization
00381              -wait_for => ['db_snapshot_after_Rfam_classify'],
00382              -flow_into  => {
00383                              3 => { 'mysql:////meta' => { 'meta_key' => $self->o('species_tree_meta_key'), 'meta_value' => '#species_tree_string#' } },
00384                              1 => ['CAFE_species_tree'],
00385                             },
00386             },
00387 
00388             {
00389              -logic_name => 'CAFE_species_tree',
00390              -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::CAFESpeciesTree',
00391              -parameters => {
00392                              'cafe_species' => $self->o('cafe_species'),
00393                              'species_tree_meta_key' => $self->o('species_tree_meta_key'),
00394                             },
00395              -hive_capacity => -1, # to allow for parallelization
00396              -flow_into => {
00397                             1 => ['CAFE_table'],
00398                            },
00399             },
00400 
00401             {
00402              -logic_name => 'CAFE_table',
00403              -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::CAFETable',
00404              -parameters => {
00405                              'work_dir'     => $self->o('work_dir'),
00406                              'cafe_species' => $self->o('cafe_species'),
00407                              'mlss_id'      => $self->o('mlss_id'),
00408                              'type'         => 'nc',
00409                             },
00410              -hive_capacity => -1,
00411              -flow_into => {
00412                             1 => ['CAFE_analysis'],
00413                            },
00414 
00415             },
00416 
00417             {
00418              -logic_name => 'CAFE_analysis',
00419              -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::CAFEAnalysis',
00420              -parameters => {
00421                              'work_dir'             => $self->o('work_dir'),
00422                              'cafe_lambdas'         => $self->o('cafe_lambdas'),
00423 #                             'cafe_struct_taxons'   => $self->o('cafe_'),
00424                              'cafe_struct_tree_str' => $self->o('cafe_struct_tree_str'),
00425                              'mlss_id'              => $self->o('mlss_id'),
00426                              'cafe_shell'           => $self->o('cafe_shell'),
00427                             },
00428              -rc_id => 3,
00429              -hive_capacity => -1,
00430              -flow_into => {
00431                             3 => {
00432                                   'mysql:////meta' => { 'meta_key' => 'cafe_lambda', 'meta_value' => '#cafe_lambda#' },
00433                                   'mysql:////meta' => { 'meta_key' => 'cafe_table_file', 'meta_value' => '#cafe_table_file#' },
00434                                   'mysql:////meta' => { 'meta_key' => 'CAFE_tree_string', 'meta_value' => '#cafe_tree_string#' },
00435                                  },
00436                            }
00437             },
00438 #----------------------------------------[by-cluster branches]----------------------------------------
00439 
00440         {   -logic_name    => 'recover_epo',
00441             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCRecoverEPO',
00442             -parameters    => {
00443                 'mlss_id'        => $self->o('mlss_id'),
00444                 'epo_db'         => $self->o('epo_db'),
00445             },
00446             -hive_capacity => 100,
00447             -wait_for => ['db_snapshot_after_Rfam_classify'],
00448             -flow_into => {
00449                            1 => [ 'genomic_alignment', 'infernal' ],
00450             },
00451         },
00452 
00453 #         {   -logic_name    => 'recover_search',
00454 #             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCRecoverSearch',
00455 #             -batch_size    => 5,
00456 #             -hive_capacity => -1,
00457 #             -flow_into => {
00458 #                 1 => [ 'infernal' ],
00459 #             },
00460 #         },
00461 
00462         {   -logic_name    => 'infernal',
00463             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::Infernal',
00464             -hive_capacity => 200,
00465             -failed_job_tolerance => 10,    # that many per cent jobs are allowed to fail
00466             -parameters => {
00467                             'cmbuild_exe' => $self->o('cmbuild_exe'),
00468                             'cmalign_exe' => $self->o('cmalign_exe'),
00469                            },
00470             -flow_into => {
00471                            1 => ['pre_sec_struct_tree' ],
00472                           },
00473         },
00474 
00475             {
00476              -logic_name    => 'pre_sec_struct_tree', ## pre_sec_struct_tree
00477              -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::PrepareSecStructModels',  ## PrepareRAxMLSecModels -- rename
00478              -hive_capacity => 200,
00479              -parameters => {
00480                              'raxml_exe' => $self->o('raxml_exe'),
00481                             },
00482              -flow_into => {
00483                             2 => [ 'sec_struct_model_tree'],
00484 #                            -1 => [ 'pre_sec_struct_tree_himem' ],
00485 #                            -2 => [ 'sec_struct_model_tree_himem' ],
00486                             -1 => ['fast_trees'],  # -1 is MEMLIMIT
00487                             -2 => ['fast_trees'],  # -2 is TIMELIMIT
00488                            },
00489             },
00490 
00491 #         {
00492 #          -logic_name => 'pre_sec_struct_tree_himem',
00493 #          -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::PrepareSecStructModels',
00494 #          -hive_capacity => 200,
00495 #          -parameters => {
00496 #                          'raxml_exe' => $self->o('raxml_exe'),
00497 #                         },
00498 #          -flow_into => {
00499 #                         1 => [ 'treebest_mmerge' ],
00500 #                         2 => [ 'sec_struct_model_tree_himem' ],
00501 #                        },
00502 #          -can_be_empty => 1,
00503 #          -rc_id => 1,
00504 #         },
00505 
00506         {   -logic_name    => 'sec_struct_model_tree', ## sec_struct_model_tree
00507             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::SecStructModelTree', ## SecStrucModels
00508             -hive_capacity => 200,
00509             -parameters => {
00510                             'raxml_exe' => $self->o('raxml_exe'),
00511                            },
00512             -failed_job_tolerance => 3,
00513             -flow_into => {
00514                            -1 => [ 'sec_struct_model_tree_himem' ],
00515                            -2 => [ 'sec_struct_model_tree_himem' ],
00516                           },
00517         },
00518 
00519         {
00520          -logic_name => 'sec_struct_model_tree_himem',
00521          -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::SecStructModelTree',
00522          -hive_capacity => 200,
00523          -parameters => {
00524                          'raxml' => $self->o('raxml_exe'),
00525                         },
00526          -failed_job_tolerance => 3,
00527          -can_be_empty => 1,
00528          -rc_id => 1,
00529         },
00530 
00531         {   -logic_name    => 'genomic_alignment',
00532             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCGenomicAlignment',
00533             -hive_capacity => 200,
00534             -parameters => {
00535                             'mafft_exe' => $self->o('mafft_exe'),
00536                             'mafft_binaries' => $self->o('mafft_binaries'),
00537                             'raxml_exe' => $self->o('raxml_exe'),
00538                             'prank_exe' => $self->o('prank_exe'),
00539                            },
00540             -failed_job_tolerance => 5,    # that many per cent jobs are allowed to fail
00541             -flow_into => {
00542                            -2 => ['genomic_alignment_long'],
00543                            -1 => ['genomic_alignment_long'],
00544                            3  => ['fast_trees'],
00545                            2  => ['genomic_tree'],
00546                           },
00547         },
00548 
00549             {
00550              -logic_name => 'fast_trees',
00551              -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCFastTrees',
00552              -hive_capacity => 200,
00553              -parameters => {
00554                              'fasttree_exe' => $self->o('fasttree_exe'),
00555                              'parsimonator_exe' => $self->o('parsimonator_exe'),
00556                              'raxmlLight_exe' => $self->o('raxmlLight_exe'),
00557                             },
00558              -can_be_empty => 1,
00559              -rc_id => 1,
00560             },
00561 
00562         {
00563          -logic_name => 'genomic_alignment_long',
00564          -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCGenomicAlignment',
00565          -hive_capacity => 200,
00566             -parameters => {
00567                             'mafft_exe' => $self->o('mafft_exe'),
00568                             'mafft_binaries' => $self->o('mafft_binaries'),
00569                             'raxml_exe' => $self->o('raxml_exe'),
00570                             'prank_exe' => $self->o('prank_exe'),
00571                            },
00572          -failed_job_tolerance => 5,
00573          -can_be_empty => 1,
00574          -rc_id => 1,
00575          -flow_into => {
00576                         2 => ['genomic_tree_himem'],
00577                        },
00578         },
00579 
00580             {
00581              -logic_name => 'genomic_tree',
00582              -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCGenomicTree',
00583              -hive_capacity => 200,
00584              -parameters => {
00585                              'treebest_exe' => $self->o('treebest_exe'),
00586                             },
00587              -flow_into => {
00588                             -2 => ['genomic_tree_himem'],
00589                             -1 => ['genomic_tree_himem'],
00590                            },
00591             },
00592 
00593             {
00594              -logic_name => 'genomic_tree_himem',
00595              -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCGenomicTree',
00596              -hive_capacity => 200,
00597              -parameters => {
00598                              'treebest_exe' => $self->o('treebest_exe'),
00599                             },
00600              -can_be_empty => 1,
00601              -rc_id => 1,
00602             },
00603 
00604         {   -logic_name    => 'treebest_mmerge',
00605             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCTreeBestMMerge',
00606             -hive_capacity => 400,
00607             -parameters => {
00608                             'treebest_exe' => $self->o('treebest_exe'),
00609                            },
00610             -wait_for      => ['recover_epo', 'pre_sec_struct_tree','genomic_alignment', 'genomic_alignment_long', 'sec_struct_model_tree','sec_struct_model_tree_himem', 'genomic_tree', 'genomic_tree_himem', 'fast_trees', 'infernal' ],
00611             -failed_job_tolerance => 5,
00612             -flow_into => {
00613                            1 => [ 'orthotree', 'ktreedist' ],
00614                            -1 => [ 'treebest_mmerge_himem' ],
00615                            -2 => [ 'treebest_mmerge_himem' ],
00616             },
00617         },
00618 
00619         {
00620          -logic_name => 'treebest_mmerge_himem',
00621          -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCTreeBestMMerge',
00622          -hive_capacity => 400,
00623          -parameters => {
00624                          'treebest_exe' => $self->o('treebest_exe'),
00625                         },
00626          -failed_job_tolerance => 5,
00627          -flow_into => {
00628                         1 => [ 'orthotree', 'ktreedist' ],
00629                        },
00630          -rc_id => 1,
00631         },
00632 
00633         {   -logic_name    => 'orthotree',
00634             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCOrthoTree',
00635             -hive_capacity => 200,
00636             -flow_into => {
00637                            -1 => ['orthotree_himem' ],
00638                            -2 => ['orthotree_himem' ],
00639                           },
00640         },
00641 
00642         {
00643          -logic_name => 'orthotree_himem',
00644          -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCOrthoTree',
00645          -hive_capacity => 200,
00646          -failed_job_tolerance => 5,
00647          -rc_id => 1,
00648         },
00649 
00650         {   -logic_name    => 'ktreedist',
00651             -module        => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::Ktreedist',
00652             -hive_capacity => -1,
00653             -parameters => {
00654                             'treebest_exe'  => $self->o('treebest_exe'),
00655                             'ktreedist_exe' => $self->o('ktreedist_exe'),
00656                            },
00657             -failed_job_tolerance =>  5,    # that many per cent jobs are allowed to fail
00658             -flow_into => {
00659                            -1 => [ 'ktreedist_himem' ],
00660                           },
00661         },
00662 
00663         {
00664          -logic_name => 'ktreedist_himem',
00665          -module => 'Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::Ktreedist',
00666          -hive_capacity => -1,
00667          -parameters => {
00668                          'treebest_exe'  => $self->o('treebest_exe'),
00669                          'ktreedist_exe' => $self->o('ktreedist_exe'),
00670                         },
00671          -failed_job_tolerance => 5,
00672          -rc_id => 1,
00673         },
00674 
00675     ];
00676 }
00677 
00678 1;
00679