Archive Ensembl HomeArchive Ensembl Home
PairwiseSynteny.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::PairwiseSynteny
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db           = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $pairwisesynteny = Bio::EnsEMBL::Compara::RunnableDB::PairwiseSynteny->new
00018   (
00019    -db         => $db,
00020    -input_id   => $input_id,
00021    -analysis   => $analysis
00022   );
00023 $pairwisesynteny->fetch_input(); #reads from DB
00024 $pairwisesynteny->run();
00025 $pairwisesynteny->output();
00026 $pairwisesynteny->write_output(); #writes to DB
00027 
00028 =cut
00029 
00030 
00031 =head1 DESCRIPTION
00032 
00033 This Analysis will take the sequences from a cluster, the cm from
00034 nc_profile and run a profiled alignment, storing the results as
00035 cigar_lines for each sequence.
00036 
00037 =cut
00038 
00039 
00040 =head1 CONTACT
00041 
00042   Contact Albert Vilella on module implementation/design detail: avilella@ebi.ac.uk
00043   Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
00044 
00045 =cut
00046 
00047 
00048 =head1 APPENDIX
00049 
00050 The rest of the documentation details each of the object methods. 
00051 Internal methods are usually preceded with a _
00052 
00053 =cut
00054 
00055 
00056 package Bio::EnsEMBL::Compara::RunnableDB::PairwiseSynteny;
00057 
00058 use strict;
00059 use Getopt::Long;
00060 use Time::HiRes qw(time gettimeofday tv_interval);
00061 use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
00062 
00063 use Bio::EnsEMBL::Hive;
00064 our @ISA = qw(Bio::EnsEMBL::Hive::Process);
00065 
00066 
00067 =head2 fetch_input
00068 
00069     Title   :   fetch_input
00070     Usage   :   $self->fetch_input
00071     Function:   Fetches input data from the database
00072     Returns :   none
00073     Args    :   none
00074 
00075 =cut
00076 
00077 
00078 sub fetch_input {
00079   my( $self) = @_;
00080 
00081   $self->{'clusterset_id'} = 1;
00082 
00083   #create a Compara::DBAdaptor which shares the same DBI handle
00084   #with the Pipeline::DBAdaptor that is based into this runnable
00085   $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new
00086     (
00087      -DBCONN=>$self->db->dbc
00088     );
00089 
00090   # Get the needed adaptors here
00091   $self->{gdba} = $self->{'comparaDBA'}->get_GenomeDBAdaptor;
00092 
00093   $self->get_params($self->parameters);
00094   $self->get_params($self->input_id);
00095 
00096 # # For long parameters, look at analysis_data
00097 #   if($self->{analysis_data_id}) {
00098 #     my $analysis_data_id = $self->{analysis_data_id};
00099 #     my $analysis_data_params = $self->db->get_AnalysisDataAdaptor->fetch_by_dbID($analysis_data_id);
00100 #     $self->get_params($analysis_data_params);
00101 #   }
00102 
00103   $self->fetch_pairwise_blasts;
00104   $self->fetch_coordinates;
00105 
00106   return 1;
00107 }
00108 
00109 
00110 sub get_params {
00111   my $self         = shift;
00112   my $param_string = shift;
00113 
00114   return unless($param_string);
00115   print("parsing parameter string : ",$param_string,"\n") if($self->debug);
00116 
00117   my $params = eval($param_string);
00118   return unless($params);
00119 
00120   if($self->debug) {
00121     foreach my $key (keys %$params) {
00122       print("  $key : ", $params->{$key}, "\n");
00123     }
00124   }
00125 
00126   foreach my $key (qw[param1 gdbs analysis_data_id]) {
00127     my $value = $params->{$key};
00128     $self->{$key} = $value if defined $value;
00129   }
00130 
00131   return;
00132 }
00133 
00134 
00135 =head2 run
00136 
00137     Title   :   run
00138     Usage   :   $self->run
00139     Function:   runs something
00140     Returns :   none
00141     Args    :   none
00142 
00143 =cut
00144 
00145 sub run {
00146   my $self = shift;
00147 
00148   $self->run_pairwise_synteny;
00149 }
00150 
00151 
00152 =head2 write_output
00153 
00154     Title   :   write_output
00155     Usage   :   $self->write_output
00156     Function:   stores something
00157     Returns :   none
00158     Args    :   none
00159 
00160 =cut
00161 
00162 
00163 sub write_output {
00164   my $self = shift;
00165 
00166 }
00167 
00168 
00169 ##########################################
00170 #
00171 # internal methods
00172 #
00173 ##########################################
00174 
00175 1;
00176 
00177 sub run_pairwise_synteny {
00178   my $self = shift;
00179 
00180   return 1;
00181 }
00182 
00183 sub fetch_pairwise_blasts {
00184   my $self = shift;
00185 
00186   my @genome_dbs = @{$self->{gdbs}};
00187   my $genome_db1 = $self->{gdba}->fetch_by_dbID($genome_dbs[0]);
00188   my $genome_db2 = $self->{gdba}->fetch_by_dbID($genome_dbs[1]);
00189 
00190   my $tmp = $self->worker_temp_directory;
00191   unlink </tmp/*.blasts>;
00192   $self->fetch_cross_distances($genome_db1,$genome_db2,1);
00193   $self->fetch_cross_distances($genome_db2,$genome_db1,2);
00194 
00195   return 1;
00196 }
00197 
00198 
00199 sub fetch_cross_distances {
00200   my $self = shift;
00201   my $gdb = shift;
00202   my $gdb2 = shift;
00203   my $filename_prefix = shift;
00204 
00205   my $starttime = time();
00206 
00207   my $gdb_id = $gdb->dbID;
00208   my $species_name = lc($gdb->name);
00209   $species_name =~ s/\ /\_/g;
00210   my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
00211 
00212   my $gdb_id2 = $gdb2->dbID;
00213   my $sql = "SELECT ".
00214             "concat(qmember_id,' ',hmember_id,' ',score) ".
00215             "FROM $tbl_name where hgenome_db_id=$gdb_id2";
00216 
00217   print("$sql\n");
00218   my $sth = $self->dbc->prepare($sql);
00219 
00220   $sth->execute();
00221   printf("%1.3f secs to execute\n", (time()-$starttime));
00222   print("  done with fetch\n");
00223 
00224   my $gdb_filename = $gdb_id if (1 == $filename_prefix);
00225   $gdb_filename = $gdb_id2 if (2 == $filename_prefix);
00226   my $filename = $self->worker_temp_directory . "$gdb_filename.blasts";
00227   # We append all blasts in one file
00228   open FILE, ">>$filename" or die $!;
00229   while ( my $row  = $sth->fetchrow ) {
00230     print FILE "$row\n";
00231   }
00232   $sth->finish;
00233   close FILE;
00234   printf("%1.3f secs to process\n", (time()-$starttime));
00235 
00236   return 1;
00237 }
00238 
00239 sub fetch_coordinates {
00240   my $self = shift;
00241 
00242   my $starttime = time();
00243 
00244   foreach my $genome_db_id (@{$self->{gdbs}}) {
00245     my $sql = "SELECT ".
00246               "concat(m2.member_id,' ',m1.chr_name,' ',m1.chr_start,' ',m1.chr_end,' ',if(m1.chr_strand=1,'-','+')) ".
00247               "FROM member m1, member m2, subset_member sm ".
00248               "WHERE m2.gene_member_id=m1.member_id and ".
00249               "m2.member_id=sm.member_id and ".
00250               "m1.source_name='ENSEMBLGENE' and ".
00251               "m1.genome_db_id=$genome_db_id";
00252 
00253     print("$sql\n");
00254     my $sth = $self->dbc->prepare($sql);
00255 
00256     $sth->execute();
00257     printf("%1.3f secs to execute\n", (time()-$starttime));
00258     print("  done with fetch\n");
00259 
00260     my $filename = $self->worker_temp_directory . "/" . "$genome_db_id.coordinates";
00261     # We append all blasts in one file
00262     open FILE, ">$filename" or die $!;
00263     while ( my $row  = $sth->fetchrow ) {
00264       print FILE "$row\n";
00265     }
00266     $sth->finish;
00267     close FILE;
00268     printf("%1.3f secs to process\n", (time()-$starttime));
00269   }
00270   $DB::single=1;1;
00271   return 1;
00272 }