Archive Ensembl HomeArchive Ensembl Home
GeneStoreNCMembers.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::GeneStoreNCMembers
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db      = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $g_load_members = Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::GeneStoreNCMembers->new (
00018                                                     -db      => $db,
00019                                                     -input_id   => $input_id
00020                                                     -analysis   => $analysis );
00021 $g_load_members->fetch_input(); #reads from DB
00022 $g_load_members->run();
00023 $g_load_members->output();
00024 $g_load_members->write_output(); #writes to DB
00025 
00026 =cut
00027 
00028 =head1 DESCRIPTION
00029 
00030 Create members from a given ncRNA gene (both ncRNA members and gene member).
00031 
00032 =cut
00033 
00034 =head1 CONTACT
00035 
00036 Describe contact details here
00037 
00038 =cut
00039 
00040 =head1 APPENDIX
00041 
00042 The rest of the documentation details each of the object methods.
00043 Internal methods are usually preceded with a _
00044 
00045 =cut
00046 
00047 package Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::GeneStoreNCMembers;
00048 
00049 use strict;
00050 use Bio::EnsEMBL::Compara::Member;
00051 use Bio::EnsEMBL::Compara::Subset;
00052 
00053 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00054 
00055 sub param_defaults {
00056     return {
00057         'store_genes'  => 1,    # whether genes are also stored as members
00058     };
00059 }
00060 
00061 
00062 =head2 fetch_input
00063 
00064     Read the parameters and set up all necessary objects.
00065 
00066 =cut
00067 
00068 sub fetch_input {
00069     my $self = shift @_;
00070 
00071     $self->input_job->transient_error(0);
00072     my $genome_db_id = $self->param('genome_db_id') || die "'genome_db_id' parameter is an obligatory one, please specify";
00073     my $stable_id = $self->param('stable_id')       || die "'stable_id' parameter is an obligatory one, please specify";
00074     $self->input_job->transient_error(1);
00075 
00076         # fetch the Compara::GenomeDB object for the genome_db_id
00077     my $genome_db = $self->compara_dba->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id) or die "Could not fetch genome_db with id=$genome_db_id";
00078     $self->param('genome_db', $genome_db);
00079   
00080         # using genome_db_id connect to external core database
00081     my $core_db = $genome_db->db_adaptor() or die "Can't connect to genome database for id=$genome_db_id";
00082     $self->param('core_db', $core_db);
00083   
00084     $self->param('to_ncrna_subset', []);
00085     $self->param('to_gene_subset',  []);
00086 }
00087 
00088 
00089 =head2 run
00090 
00091     Fetch a particular gene and create members from its' non-coding transcript(s?) and the gene itself
00092 
00093 =cut
00094 
00095 sub run {
00096     my $self = shift @_;
00097 
00098     my $core_db     = $self->param('core_db');
00099     my $stable_id   = $self->param('stable_id');
00100 
00101     $self->compara_dba->dbc->disconnect_when_inactive(0);
00102     $core_db->dbc->disconnect_when_inactive(0);
00103 
00104     my $gene_adaptor = $core_db->get_GeneAdaptor or die "Could not create the core GeneAdaptor";
00105 
00106     my $gene = $gene_adaptor->fetch_by_stable_id( $stable_id ) or die "Could not fetch gene with stable_id '$stable_id'";
00107 
00108         # Store gene:
00109     $self->store_ncrna_gene($gene);
00110 
00111     $self->compara_dba->dbc->disconnect_when_inactive(1);
00112     $core_db->dbc->disconnect_when_inactive(1);
00113 }
00114 
00115 
00116 =head2 write_output
00117 
00118     Dataflow the (subset_id,member_id) pairs if the corresponding subset_ids were passed as parameters.
00119     These pairs will end up in the subset_member table if everything is wired correctly.
00120 
00121 =cut
00122 
00123 sub write_output {
00124     my $self = shift @_;
00125 
00126     if(my $ncrna_subset_id = $self->param('ncrna_subset_id')) {
00127 
00128         foreach my $member_id (@{$self->param('to_ncrna_subset')}) {
00129             $self->dataflow_output_id({
00130                 'subset_id' => $ncrna_subset_id,
00131                 'member_id' => $member_id,
00132             }, 3);
00133         }
00134     }
00135 
00136     if(my $gene_subset_id = $self->param('gene_subset_id')) {
00137 
00138         foreach my $member_id (@{$self->param('to_gene_subset')}) {
00139             $self->dataflow_output_id({
00140                 'subset_id' => $gene_subset_id,
00141                 'member_id' => $member_id,
00142             }, 4);
00143         }
00144     }
00145 }
00146 
00147 
00148 ######################################
00149 #
00150 # subroutines
00151 #
00152 #####################################
00153 
00154 
00155 sub store_ncrna_gene {
00156     my ($self, $gene) = @_;
00157 
00158     my $longest_ncrna_member;
00159     my $max_ncrna_length = 0;
00160     my $gene_member;
00161     my $gene_member_not_stored = 1;
00162 
00163     my $member_adaptor = $self->compara_dba->get_MemberAdaptor();
00164 
00165     my $pseudo_stableID_prefix = $self->param('pseudo_stableID_prefix');
00166 
00167     if($pseudo_stableID_prefix) {
00168         $gene->stable_id($pseudo_stableID_prefix ."G_". $gene->dbID);
00169     }
00170 
00171     TRANSCRIPT: foreach my $transcript (@{$gene->get_all_Transcripts}) {
00172         if (defined $transcript->translation) {
00173             warn("Translation exists for ncRNA transcript ", $transcript->stable_id, "(dbID=",$transcript->dbID.")\n");
00174             next;
00175         }
00176 
00177         if($pseudo_stableID_prefix) {
00178             $transcript->stable_id($pseudo_stableID_prefix ."T_". $transcript->dbID);
00179         }
00180 
00181         print("     transcript " . $transcript->stable_id ) if($self->debug);
00182 
00183         my $fasta_description = $self->fasta_description($gene, $transcript) or next TRANSCRIPT;
00184 
00185         my $ncrna_member = Bio::EnsEMBL::Compara::Member->new_from_transcript(
00186             -transcript  => $transcript,
00187             -genome_db   => $self->param('genome_db'),
00188             -translate   => 'ncrna',
00189             -description => $fasta_description,
00190         );
00191 
00192         print(" => member " . $ncrna_member->stable_id) if($self->debug);
00193 
00194         my $transcript_spliced_seq = $transcript->spliced_seq;
00195 
00196             # store gene_member here only if at least one ncRNA is to be loaded for the gene.
00197         if($self->param('store_genes') and $gene_member_not_stored) {
00198             print("     gene       " . $gene->stable_id ) if($self->debug);
00199 
00200             $gene_member = Bio::EnsEMBL::Compara::Member->new_from_gene(
00201                 -gene      => $gene,
00202                 -genome_db => $self->param('genome_db'),
00203             );
00204             print(" => member " . $gene_member->stable_id) if($self->debug);
00205 
00206             eval {
00207                 $member_adaptor->store($gene_member);
00208                 print(" : stored") if($self->debug);
00209             };
00210 
00211             push @{$self->param('to_gene_subset')}, $gene_member->dbID;
00212             print("\n") if($self->debug);
00213             $gene_member_not_stored = 0;
00214         }
00215 
00216         $member_adaptor->store($ncrna_member);
00217         $member_adaptor->store_gene_peptide_link($gene_member->dbID, $ncrna_member->dbID);
00218         print(" : stored\n") if($self->debug);
00219 
00220         if(length($transcript_spliced_seq) > $max_ncrna_length) {
00221             $max_ncrna_length     = length($transcript_spliced_seq);
00222             $longest_ncrna_member = $ncrna_member;
00223         }
00224     }
00225 
00226     if($longest_ncrna_member) {
00227         push @{$self->param('to_ncrna_subset')}, $longest_ncrna_member->dbID;
00228     }
00229 }
00230 
00231 sub fasta_description {
00232   my ($self, $gene, $transcript) = @_;
00233   my $acc = 'NULL'; my $biotype = undef;
00234   $DB::single=1;1;
00235   eval { $acc = $transcript->display_xref->primary_id;};
00236   unless ($acc =~ /RF00/) {
00237     $biotype = $transcript->biotype;
00238     if ($biotype =~ /miRNA/) {
00239       my @exons = @{$transcript->get_all_Exons};
00240       $self->throw("unexpected miRNA with more than one exon") if (1 < scalar @exons);
00241       my $exon = $exons[0];
00242       my @supporting_features = @{$exon->get_all_supporting_features};
00243       if (scalar(@supporting_features)!=1) {
00244         warn "unexpected miRNA supporting features";
00245         next;
00246       }
00247       my $supporting_feature = $supporting_features[0];
00248       eval { $acc = $supporting_feature->hseqname; };
00249     } elsif ($biotype =~ /snoRNA/) {
00250       eval { $acc = $transcript->external_name; };
00251       #     } elsif ($biotype =~ /Mt_tRNA/) { # wont deal with these at the moment
00252       #       $acc = 'RF00005';
00253     } elsif ($biotype =~ /Mt_rRNA/) {
00254       # $acc = $biotype;
00255     } else {
00256       # We just leave it as NULL and will skip it in RFAMClassify
00257     }
00258   }
00259   my $description = "Transcript:" . $transcript->stable_id .
00260                     " Gene:" .      $gene->stable_id .
00261                     " Chr:" .       $gene->seq_region_name .
00262                     " Start:" .     $gene->seq_region_start .
00263                     " End:" .       $gene->seq_region_end.
00264                     " Acc:" .       $acc;
00265   print STDERR "Description... $description\n" if ($self->debug);
00266   return $description;
00267 }
00268 
00269 1;