Archive Ensembl HomeArchive Ensembl Home
CreateAlignmentNetsJobs.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::RunnableDB::PairAligner::CreateAlignmentNetsJobs
00022 
00023 =cut
00024 
00025 =head1 SYNOPSIS
00026 
00027 =cut
00028 
00029 =head1 DESCRIPTION
00030 
00031 =cut
00032 
00033 =head1 APPENDIX
00034 
00035 The rest of the documentation details each of the object methods.
00036 Internal methods are usually preceded with a _
00037 
00038 =cut
00039 
00040 package Bio::EnsEMBL::Compara::RunnableDB::PairAligner::CreateAlignmentNetsJobs;
00041 
00042 use strict;
00043 
00044 use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
00045 use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
00046 
00047 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00048 
00049 my $DEFAULT_DUMP_MIN_SIZE = 11500000;
00050 
00051 sub fetch_input {
00052   my $self = shift;
00053 
00054   if (defined ($self->param('query_collection_name'))) {
00055       $self->param('collection_name', $self->param('query_collection_name'));
00056   }
00057 
00058   # get DnaCollection of query
00059   throw("must specify 'collection_name' to identify DnaCollection of query") 
00060     unless(defined($self->param('collection_name')));
00061   $self->param('collection', $self->compara_dba->get_DnaCollectionAdaptor->
00062            fetch_by_set_description($self->param('collection_name')));
00063   throw("unable to find DnaCollection with name : ". $self->param('collection_name'))
00064     unless(defined($self->param('collection')));
00065 
00066   #get the MethodLinkSpeciesSet
00067   throw("Must specify 'mlss_id' to identify a MethodLinkSpeciesSet") unless (defined($self->param('input_mlss_id')));
00068   $self->param('method_link_species_set', $self->compara_dba->get_MethodLinkSpeciesSetAdaptor->fetch_by_dbID($self->param('input_mlss_id')));
00069 
00070   throw("unable to find method_link_species_set for mlss_id=",$self->param('input_mlss_id')) unless(defined($self->param('method_link_species_set')));
00071 
00072   $self->print_params;
00073 
00074   return 1;
00075 }
00076 
00077 
00078 sub run {
00079   my $self = shift;
00080   return 1;
00081 }
00082 
00083 
00084 sub write_output {
00085   my $self = shift;
00086   $self->createAlignmentNetsJobs();
00087 
00088   return 1;
00089 }
00090 
00091 sub print_params {
00092   my $self = shift;
00093 
00094   printf(" params:\n");
00095   printf("   method_link_species_set_id : %d\n", $self->param('method_link_species_set')->dbID);
00096   printf("   collection           : (%d) %s\n", 
00097          $self->param('collection')->dbID, $self->param('collection')->description);
00098 }
00099 
00100 
00101 sub createAlignmentNetsJobs {
00102   my $self = shift;
00103 
00104   my $query_dna_list  = $self->param('collection')->get_all_dna_objects;
00105 
00106   my $count=0;
00107 #  my $sql ="select group_id,min(dnafrag_start) as min,max(dnafrag_end) as max from genomic_align ga, genomic_align_group gag where ga.genomic_align_id=gag.genomic_align_id and ga.method_link_species_set_id = ? and ga.dnafrag_id= ? and gag.type = ? group by group_id order by min asc,max asc";
00108 
00109   my $sql = "select ga.dnafrag_start, ga.dnafrag_end from genomic_align ga, genomic_align_block gab where ga.genomic_align_block_id=gab.genomic_align_block_id and ga.method_link_species_set_id= ? and ga.dnafrag_id= ? order by dnafrag_start asc, dnafrag_end asc";
00110 
00111   my $sth = $self->compara_dba->dbc->prepare($sql);
00112 
00113   foreach my $qy_dna_object (@{$query_dna_list}) {
00114     my $qy_dnafrag_id = $qy_dna_object->dnafrag->dbID;
00115     $sth->execute($self->param('method_link_species_set')->dbID, $qy_dnafrag_id);
00116     my ($dnafrag_start,$dnafrag_end);
00117     $sth->bind_columns(\$dnafrag_start, \$dnafrag_end);
00118     my ($slice_start,$slice_end);
00119     my @genomic_slices;
00120     while ($sth->fetch()) {
00121       unless (defined $slice_start) {
00122         ($slice_start,$slice_end) = ($dnafrag_start, $dnafrag_end);
00123         next;
00124       }
00125       if ($dnafrag_start > $slice_end) {
00126         push @genomic_slices, [$slice_start,$slice_end];
00127         ($slice_start,$slice_end) = ($dnafrag_start, $dnafrag_end);
00128       } else {
00129         if ($dnafrag_end > $slice_end) {
00130           $slice_end = $dnafrag_end;
00131         }
00132       }
00133     }
00134     $sth->finish;
00135 
00136     # Skip if no alignments are found on this slice
00137     next if (!defined $slice_start || !defined $slice_end);
00138 
00139     push @genomic_slices, [$slice_start,$slice_end];
00140 
00141     my @grouped_genomic_slices;
00142     undef $slice_start;
00143     undef $slice_end;
00144     my $max_slice_length = 500000;
00145     while (my $genomic_slices = shift @genomic_slices) {
00146       my ($start, $end) = @{$genomic_slices};
00147       unless (defined $slice_start) {
00148         ($slice_start,$slice_end) = ($start, $end);
00149         next;
00150       }
00151       my $slice_length = $slice_end - $slice_start + 1;
00152       my $length = $end - $start + 1;
00153       if ($slice_length > $max_slice_length || $slice_length + $length > $max_slice_length) {
00154         push @grouped_genomic_slices, [$slice_start,$slice_end];
00155         ($slice_start,$slice_end) = ($start, $end);
00156       } else {
00157         $slice_end = $end;
00158       }
00159     }
00160     push @grouped_genomic_slices, [$slice_start,$slice_end];
00161 
00162     while (my $genomic_slices = shift @grouped_genomic_slices) {
00163       my ($start, $end) = @{$genomic_slices};
00164       my $input_hash = {};
00165       $input_hash->{'start'} = $start;
00166       $input_hash->{'end'} = $end;
00167       $input_hash->{'DnaFragID'} = $qy_dnafrag_id;
00168       $input_hash->{'input_mlss_id'} = $self->param('method_link_species_set')->dbID;
00169       $input_hash->{'output_mlss_id'} = $self->param('output_mlss_id');
00170 
00171       $self->dataflow_output_id($input_hash, 2);
00172       $count++;
00173     }
00174   }
00175   if ($count == 0) {
00176       # No alignments have been found.
00177       $self->input_job->autoflow(0);
00178       print "No jobs created\n";
00179   } else {
00180     printf("created %d jobs for AlignmentNets\n", $count);
00181   }
00182 
00183   #
00184   #Flow to 'set_internal_ids' and 'update_max_alignment_length_after_net' on branch 1
00185   #
00186   my $output_hash = {};
00187   %$output_hash = ('method_link_species_set_id' => $self->param('output_mlss_id'));
00188   $self->dataflow_output_id($output_hash,1);
00189 
00190 }
00191 
00192 1;