Archive Ensembl HomeArchive Ensembl Home
RFAMLoadModels.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::RFAMLoadModels
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db           = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $rfamloadmodels = Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::RFAMLoadModels->new
00018   (
00019    -db         => $db,
00020    -input_id   => $input_id,
00021    -analysis   => $analysis
00022   );
00023 $rfamloadmodels->fetch_input(); #reads from DB
00024 $rfamloadmodels->run();
00025 $rfamloadmodels->output();
00026 $rfamloadmodels->write_output(); #writes to DB
00027 
00028 =cut
00029 
00030 
00031 =head1 DESCRIPTION
00032 
00033 This Analysis/RunnableDB is designed to fetch the Infernal models from
00034 the RFAM ftp site and load them into the database to be used in the
00035 alignment process.
00036 
00037 =cut
00038 
00039 
00040 =head1 CONTACT
00041 
00042   Contact Albert Vilella on module implementation/design detail: avilella@ebi.ac.uk
00043   Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
00044 
00045 =cut
00046 
00047 
00048 =head1 APPENDIX
00049 
00050 The rest of the documentation details each of the object methods. 
00051 Internal methods are usually preceded with a _
00052 
00053 =cut
00054 
00055 
00056 package Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::RFAMLoadModels;
00057 
00058 use strict;
00059 use IO::File;
00060 use File::Basename;
00061 use Time::HiRes qw(time gettimeofday tv_interval);
00062 use LWP::Simple;
00063 
00064 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00065 
00066 
00067 sub param_defaults {
00068     return {
00069         'type'  => 'infernal',
00070     };
00071 }
00072 
00073 
00074 =head2 fetch_input
00075 
00076     Title   :   fetch_input
00077     Usage   :   $self->fetch_input
00078     Function:   Fetches input data for repeatmasker from the database
00079     Returns :   none
00080     Args    :   none
00081 
00082 =cut
00083 
00084 sub fetch_input {
00085     my $self = shift @_;
00086 
00087 }
00088 
00089 
00090 =head2 run
00091 
00092     Title   :   run
00093     Usage   :   $self->run
00094     Function:   runs hmmbuild
00095     Returns :   none
00096     Args    :   none
00097 
00098 =cut
00099 
00100 
00101 sub run {
00102     my $self = shift @_;
00103 
00104     $self->download_rfam_models;
00105 }
00106 
00107 
00108 =head2 write_output
00109 
00110     Title   :   write_output
00111     Usage   :   $self->write_output
00112     Function:   stores nctree
00113     Returns :   none
00114     Args    :   none
00115 
00116 =cut
00117 
00118 
00119 sub write_output {
00120     my $self = shift @_;
00121 
00122     $self->store_hmmprofile;
00123 }
00124 
00125 
00126 ##########################################
00127 #
00128 # internal methods
00129 #
00130 ##########################################
00131 
00132 sub download_rfam_models {
00133   my $self = shift;
00134 
00135   my $starttime = time();
00136 
00137   my $worker_temp_directory = $self->worker_temp_directory;
00138   my $url = 'ftp://ftp.sanger.ac.uk/pub/databases/Rfam/10.0/'; my $file = 'Rfam.cm.gz';
00139 #  my $url  = 'ftp://ftp.sanger.ac.uk/pub/databases/Rfam/CURRENT/'; my $file = 'infernal-latest.tar.gz';
00140   my $expanded_file = $worker_temp_directory . $file; $expanded_file =~ s/\.gz$//;
00141 
00142   unlink($expanded_file); # retry safe
00143   my $ftp_file = $url . $file;
00144   my $tmp_file = $worker_temp_directory . $file;
00145   my $status = getstore($ftp_file, $tmp_file);
00146   die "Error $status on $ftp_file" unless is_success($status);
00147   my $cmd = "gunzip $tmp_file";
00148   # my $cmd = "tar xzf $tmp_file";
00149 
00150   unless(system("cd $worker_temp_directory; $cmd") == 0) {
00151     print("$cmd\n");
00152     $self->throw("error expanding RFAMLoadModels $!\n");
00153   }
00154   printf("time for RFAMLoadModels fetch : %1.3f secs\n" , time()-$starttime);
00155 
00156   $self->param('multicm_file', $expanded_file);
00157 
00158   return 1;
00159 }
00160 
00161 
00162 sub store_hmmprofile {
00163   my $self = shift;
00164 
00165   my $multicm_file = $self->param('multicm_file');
00166   open MULTICM,$multicm_file or die "$!\n";
00167   my $name; my $model_id;
00168   my $profile_content = undef;
00169   while (<MULTICM>) {
00170     $profile_content .= $_;
00171     if ($_ =~ /NAME/) { 
00172       my ($tag,$this_name) = split(" ",$_);
00173       $name = $this_name;
00174     } elsif ($_ =~ /ACCESSION/) { 
00175       my ($tag,$accession) = split(" ",$_);
00176       $model_id = $accession;
00177     } elsif ($_ =~ /\/\//) {
00178       # End of profile, let's store it
00179       $self->throw("Error loading cm profile [$model_id]\n") unless (defined($model_id) && defined($profile_content));
00180       $self->load_cmprofile($profile_content,$model_id,$name);
00181       $model_id = undef;
00182       $profile_content = undef;
00183     }
00184   }
00185 
00186   return 1;
00187 }
00188 
00189 
00190 sub load_cmprofile {
00191   my $self = shift;
00192   my $cm_profile = shift;
00193   my $model_id = shift;
00194   my $name = shift;
00195 
00196   print("load profile $model_id\n") if($self->debug);
00197 
00198   my $table_name = 'nc_profile';
00199   my $sth = $self->compara_dba->dbc->prepare("INSERT IGNORE INTO $table_name VALUES (?,?,?,?)");
00200   $sth->execute($model_id, $name, $self->param('type'), $cm_profile);
00201   $sth->finish;
00202 
00203   return undef;
00204 }
00205 
00206 1;