Archive Ensembl HomeArchive Ensembl Home
DumpSubsetCreateBlastDB.pm
Go to the documentation of this file.
00001 =pod
00002 
00003 =head1 NAME
00004 
00005 Bio::EnsEMBL::Compara::RunnableDB::ProteinTrees::DumpSubsetCreateBlastDB
00006 
00007 =head1 DESCRIPTION
00008 
00009 This is a Compara-specific module that takes in a Subset of members (defined by subset_id),
00010 dumps the sequences into a file in Fasta format and creates a Blastp database from this file.
00011 It is used by GeneTrees pipeline.
00012 
00013 =cut
00014 
00015 
00016 package Bio::EnsEMBL::Compara::RunnableDB::ProteinTrees::DumpSubsetCreateBlastDB;
00017 
00018 use strict;
00019 
00020 use Bio::EnsEMBL::Analysis::Tools::BlastDB;
00021 
00022 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00023 
00024 
00025 sub fetch_input {
00026     my $self = shift @_;
00027 
00028     my $genome_db_id = $self->param('genome_db_id') || $self->param('genome_db_id', $self->param('gdb'))        # for compatibility
00029         or die "'genome_db_id' is an obligatory parameter";
00030 
00031     my $genome_db = $self->compara_dba->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id) or die "cannot fetch GenomeDB with id '$genome_db_id'";
00032 
00033     my $description_pattern = "gdb:$genome_db_id \% translations";
00034     my $subset    = $self->compara_dba->get_SubsetAdaptor()->fetch_by_description_pattern($description_pattern) or die "cannot fetch Subset with pattern '$description_pattern'";
00035 
00036     my $fasta_file = $self->param('fasta_dir') . '/' . $genome_db->name() . '_' . $genome_db->assembly() . '.fasta';
00037     $fasta_file =~ s/\s+/_/g;    # replace whitespace with '_' characters
00038     $fasta_file =~ s/\/\//\//g;  # converts any // in path to /
00039     $self->param('fasta_file', $fasta_file);
00040 
00041         # write fasta file:
00042     $self->compara_dba->get_SubsetAdaptor->dumpFastaForSubset($subset, $fasta_file);
00043 }
00044 
00045 
00046 sub run {
00047     my $self = shift @_;
00048 
00049     my $fasta_file = $self->param('fasta_file');
00050 
00051         # configure the fasta file for use as a blast database file:
00052     my $blastdb        = Bio::EnsEMBL::Analysis::Tools::BlastDB->new(
00053         -sequence_file => $fasta_file,
00054         -mol_type => 'PROTEIN'
00055     );
00056     $blastdb->create_blastdb;
00057 }
00058 
00059 1;
00060