Archive Ensembl HomeArchive Ensembl Home
SearchHMM.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::SearchHMM
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db           = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $search_hmm = Bio::EnsEMBL::Compara::RunnableDB::SearchHMM->new
00018   (
00019    -db         => $db,
00020    -input_id   => $input_id,
00021    -analysis   => $analysis
00022   );
00023 $search_hmm->fetch_input(); #reads from DB
00024 $search_hmm->run();
00025 $search_hmm->output();
00026 $search_hmm->write_output(); #writes to DB
00027 
00028 =cut
00029 
00030 
00031 =head1 DESCRIPTION
00032 
00033 This Analysis will take the sequences from a cluster, the cm from
00034 nc_profile and run a profiled alignment, storing the results as
00035 cigar_lines for each sequence.
00036 
00037 =cut
00038 
00039 
00040 =head1 CONTACT
00041 
00042   Contact Albert Vilella on module implementation/design detail: avilella@ebi.ac.uk
00043   Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
00044 
00045 =cut
00046 
00047 
00048 =head1 APPENDIX
00049 
00050 The rest of the documentation details each of the object methods. 
00051 Internal methods are usually preceded with a _
00052 
00053 =cut
00054 
00055 
00056 package Bio::EnsEMBL::Compara::RunnableDB::SearchHMM;
00057 
00058 use strict;
00059 use Getopt::Long;
00060 use Time::HiRes qw(time gettimeofday tv_interval);
00061 
00062 use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
00063 use Bio::EnsEMBL::Hive;
00064 our @ISA = qw(Bio::EnsEMBL::Hive::Process);
00065 
00066 
00067 =head2 fetch_input
00068 
00069     Title   :   fetch_input
00070     Usage   :   $self->fetch_input
00071     Function:   Fetches input data from the database
00072     Returns :   none
00073     Args    :   none
00074 
00075 =cut
00076 
00077 
00078 sub fetch_input {
00079   my( $self) = @_;
00080 
00081   $self->{'clusterset_id'} = 1;
00082   $self->{max_evalue} = 0.05;
00083 
00084   #create a Compara::DBAdaptor which shares the same DBI handle
00085   #with the Pipeline::DBAdaptor that is based into this runnable
00086   $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new
00087     (
00088      -DBCONN=>$self->db->dbc
00089     );
00090 
00091   # Get the needed adaptors here
00092   $self->{treeDBA} = $self->{'comparaDBA'}->get_ProteinTreeAdaptor;
00093 
00094   $self->get_params($self->parameters);
00095   $self->get_params($self->input_id);
00096 
00097 # # For long parameters, look at analysis_data
00098 #   if($self->{analysis_data_id}) {
00099 #     my $analysis_data_id = $self->{analysis_data_id};
00100 #     my $analysis_data_params = $self->db->get_AnalysisDataAdaptor->fetch_by_dbID($analysis_data_id);
00101 #     $self->get_params($analysis_data_params);
00102 #   }
00103 
00104   if(defined($self->{protein_tree_id})) {
00105     $self->{tree} = 
00106          $self->{treeDBA}->fetch_node_by_node_id($self->{protein_tree_id});
00107     printf("  protein_tree_id : %d\n", $self->{protein_tree_id}) if ($self->debug);
00108   }
00109 
00110   # Fetch hmm_profile
00111   $self->fetch_hmmprofile;
00112 
00113   return 1;
00114 }
00115 
00116 
00117 sub get_params {
00118   my $self         = shift;
00119   my $param_string = shift;
00120 
00121   return unless($param_string);
00122   print("parsing parameter string : ",$param_string,"\n") if($self->debug);
00123 
00124   my $params = eval($param_string);
00125   return unless($params);
00126 
00127   if($self->debug) {
00128     foreach my $key (keys %$params) {
00129       print("  $key : ", $params->{$key}, "\n");
00130     }
00131   }
00132 
00133   foreach my $key (qw[qtaxon_id protein_tree_id type cdna fastafile analysis_data_id]) {
00134     my $value = $params->{$key};
00135     $self->{$key} = $value if defined $value;
00136   }
00137 
00138   return;
00139 }
00140 
00141 
00142 =head2 run
00143 
00144     Title   :   run
00145     Usage   :   $self->run
00146     Function:   runs something
00147     Returns :   none
00148     Args    :   none
00149 
00150 =cut
00151 
00152 sub run {
00153   my $self = shift;
00154 
00155   $self->run_search_hmm;
00156 }
00157 
00158 
00159 =head2 write_output
00160 
00161     Title   :   write_output
00162     Usage   :   $self->write_output
00163     Function:   stores something
00164     Returns :   none
00165     Args    :   none
00166 
00167 =cut
00168 
00169 
00170 sub write_output {
00171   my $self = shift;
00172 
00173   $self->search_hmm_store_hits;
00174 }
00175 
00176 
00177 ##########################################
00178 #
00179 # internal methods
00180 #
00181 ##########################################
00182 
00183 
00184 sub fetch_hmmprofile {
00185   my $self = shift;
00186 
00187   my $hmm_type = $self->{type} || 'aa';
00188   my $node_id = $self->{tree}->node_id;
00189   print STDERR "type = $hmm_type\n" if ($self->debug);
00190 
00191   my $query = "SELECT hmmprofile FROM protein_tree_hmmprofile WHERE type=\"$hmm_type\" AND node_id=$node_id";
00192   print STDERR "$query\n" if ($self->debug);
00193   my $sth = $self->{comparaDBA}->dbc->prepare($query);
00194   $sth->execute;
00195   my $result = $sth->fetchrow_hashref;
00196   $self->{hmmprofile} = $result->{hmmprofile} if (defined($result->{hmmprofile}));
00197   $sth->finish;
00198 
00199   return 1;
00200 }
00201 
00202 sub run_search_hmm {
00203   my $self = shift;
00204 
00205   my $node_id = $self->{tree}->node_id;
00206   my $type = $self->{type};
00207   my $hmmprofile = $self->{hmmprofile};
00208   my $fastafile = $self->{fastafile};
00209 
00210   my $tempfilename = $self->worker_temp_directory . $node_id . "." . $type . ".hmm";
00211   open FILE, ">$tempfilename" or die "$!";
00212   print FILE $hmmprofile;
00213   close FILE;
00214   delete $self->{hmmprofile};
00215 
00216   my $search_hmm_executable = $self->analysis->program_file;
00217   unless (-e $search_hmm_executable) {
00218     $search_hmm_executable = "/nfs/acari/avilella/src/hmmer3/latest/hmmer-3.0b3/src/hmmsearch";
00219   }
00220 
00221   my $fh;
00222   eval { open($fh, "$search_hmm_executable $tempfilename $fastafile |") || die $!; };
00223   if ($@) {
00224     warn("problem with search_hmm $@ $!");
00225     return;
00226   }
00227 
00228   $self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
00229   my $starttime = time();
00230 
00231   while (<$fh>) {
00232     if (/^Scores for complete sequences/) {
00233       $_ = <$fh>;
00234       <$fh>;
00235       <$fh>; # /------- ------ -----    ------- ------ -----   ---- --  --------       -----------/
00236       while (<$fh>) {
00237         last if (/no hits above thresholds/);
00238         last if (/^\s*$/);
00239         $_ =~ /\s+(\S+)\s+(\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)/;
00240         my $evalue = $1;
00241         my $score = $2;
00242         my $id = $3;
00243         $score =~ /^\s*(\S+)/;
00244         $self->{hits}{$id}{Score} = $1;
00245         $evalue =~ /^\s*(\S+)/;
00246         $self->{hits}{$id}{Evalue} = $1;
00247       }
00248       last;
00249     }
00250   }
00251   close($fh);
00252   $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
00253   print STDERR scalar (keys %{$self->{hits}}), " hits - ",(time()-$starttime)," secs...\n";
00254 
00255   return 1;
00256 }
00257 
00258 sub search_hmm_store_hits {
00259   my $self = shift;
00260   my $type = $self->{type};
00261   my $node_id = $self->{tree}->node_id;
00262   my $qtaxon_id = $self->{qtaxon_id} || 0;
00263 
00264   my $sth = $self->{comparaDBA}->dbc->prepare
00265     ("INSERT INTO hmmsearch
00266        (stable_id,
00267         node_id,
00268         evalue,
00269         score,
00270         type,
00271         qtaxon_id) VALUES (?,?,?,?,?,?)");
00272 
00273   my $evalue_count = 0;
00274   foreach my $stable_id (keys %{$self->{hits}}) {
00275     my $evalue = $self->{hits}{$stable_id}{Evalue};
00276     my $score = $self->{hits}{$stable_id}{Score};
00277     next unless (defined($stable_id) && $stable_id ne '');
00278     next unless (defined($score));
00279     next unless ($evalue < $self->{max_evalue});
00280     $evalue_count++;
00281     $sth->execute($stable_id,
00282                   $node_id,
00283                   $evalue,
00284                   $score,
00285                   $type,
00286                   $qtaxon_id);
00287   }
00288   $sth->finish();
00289   printf("%10d hits stored\n", $evalue_count) if(($evalue_count % 10 == 0) && 0 < $self->debug);
00290   return 1;
00291 }
00292 
00293 1;