Archive Ensembl HomeArchive Ensembl Home
FindRepeatGabs.pm
Go to the documentation of this file.
00001 package Bio::EnsEMBL::Compara::Production::EPOanchors::HMMer::FindRepeatGabs;
00002 
00003 use strict;
00004 use Data::Dumper;
00005 use Bio::EnsEMBL::Registry;
00006 
00007 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00008 
00009 sub fetch_input {
00010     my ($self) = @_;
00011         my $self_dba = new Bio::EnsEMBL::Compara::DBSQL::DBAdaptor( 
00012                                 -host => $self->dbc->host, 
00013                                 -pass => $self->dbc->password, 
00014                                 -port => $self->dbc->port, 
00015                                 -user => $self->dbc->username,
00016                                 -dbname => $self->dbc->dbname);
00017         $self->param('self_dba', $self_dba);
00018 }
00019 
00020 sub run {
00021     my ($self) = @_;
00022     my $self_dba = $self->param('self_dba');
00023     my $ga_dnafrag_sth = $self_dba->dbc->prepare('SELECT DISTINCT(ga.dnafrag_id) FROM genomic_align ga INNER JOIN dnafrag df ON ' .
00024                 'ga.dnafrag_id = df.dnafrag_id WHERE df.genome_db_id = ?');
00025     $ga_dnafrag_sth->execute( $self->param('dbID') );
00026     my $comp_files = join('/', $self->param('repeat_dump_dir'), $self->param('species'), $self->param('assembly'));
00027     my $repeats_file = $comp_files . ".repeats";    
00028     foreach my $dnafrag_id (@{ $ga_dnafrag_sth->fetchall_arrayref }){
00029         $dnafrag_id = $dnafrag_id->[0];
00030         my $sth = $self_dba->dbc->prepare('SELECT dnafrag_id, dnafrag_start, dnafrag_end, genomic_align_block_id FROM genomic_align ' .
00031                     'WHERE dnafrag_id = ? ORDER BY dnafrag_id, dnafrag_start');
00032         $sth->execute($dnafrag_id);
00033         my $gab_file = $comp_files . ".gabs.$dnafrag_id";
00034         open(IN, ">$gab_file") or die $!;
00035         foreach my $gab(@{ $sth->fetchall_arrayref }){
00036             print join("\t", @$gab), "\n";
00037             print IN join("\t", @$gab), "\n";
00038         }
00039         close(IN);
00040         my $subset_rep_file = $comp_files . ".reps.$dnafrag_id";
00041         my $subset_repeats_cmd = "grep \"^$dnafrag_id\" $repeats_file > $subset_rep_file";
00042         system($subset_repeats_cmd);
00043         my $sth2 = $self_dba->dbc->prepare('UPDATE genomic_align_block SET score = -10000 WHERE genomic_align_block_id = ?');
00044         my $cmd = $self->param('find_overlaps') . " $subset_rep_file $gab_file --filter | cut -f4 | sort | uniq";
00045         my $filter_fh;
00046         open( $filter_fh, "$cmd |" ) or throw("Error opening find_overlaps command: $? $!");
00047         while(my $hit_gab = <$filter_fh>){
00048             $sth2->execute($hit_gab);
00049         }
00050     
00051         unlink($gab_file);
00052         unlink($subset_rep_file);
00053     }
00054 }
00055 
00056 1;
00057