Archive Ensembl HomeArchive Ensembl Home
NCRecoverEPO.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCRecoverEPO
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 my $db           = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
00017 my $nc_recover_epo = Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCRecoverEPO->new
00018   (
00019    -db         => $db,
00020    -input_id   => $input_id,
00021    -analysis   => $analysis
00022   );
00023 $nc_recover_epo->fetch_input(); #reads from DB
00024 $nc_recover_epo->run();
00025 $nc_recover_epo->output();
00026 $nc_recover_epo->write_output(); #writes to DB
00027 
00028 =cut
00029 
00030 
00031 =head1 DESCRIPTION
00032 
00033 This Analysis will take the sequences from a cluster, the cm from
00034 nc_profile and run a profiled alignment, storing the results as
00035 cigar_lines for each sequence.
00036 
00037 =cut
00038 
00039 
00040 =head1 CONTACT
00041 
00042   Contact Albert Vilella on module implementation/design detail: avilella@ebi.ac.uk
00043   Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
00044 
00045 =cut
00046 
00047 
00048 =head1 APPENDIX
00049 
00050 The rest of the documentation details each of the object methods. 
00051 Internal methods are usually preceded with a _
00052 
00053 =cut
00054 
00055 
00056 package Bio::EnsEMBL::Compara::RunnableDB::ncRNAtrees::NCRecoverEPO;
00057 
00058 use strict;
00059 use Bio::AlignIO;
00060 use Bio::EnsEMBL::Registry;
00061 
00062 use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');
00063 
00064 sub param_defaults {
00065     return {
00066         'clusterset_id'  => 1,
00067     };
00068 }
00069 
00070 =head2 fetch_input
00071 
00072     Title   :   fetch_input
00073     Usage   :   $self->fetch_input
00074     Function:   Fetches input data from the database
00075     Returns :   none
00076     Args    :   none
00077 
00078 =cut
00079 
00080 
00081 sub fetch_input {
00082   my $self = shift @_;
00083 
00084   $self->input_job->transient_error(0);
00085   my $mlss_id    = $self->param('mlss_id')    || die "'mlss_id' is an obligatory numeric parameter\n";
00086   my $epo_db     = $self->param('epo_db')     || die "'epo_db' is an obligatory hash parameter\n";
00087   my $nc_tree_id = $self->param('nc_tree_id') || die "'nc_tree_id' is an obligatory numeric parameter\n";
00088   $self->input_job->transient_error(1);
00089 
00090   $self->param('nc_tree', $self->compara_dba->get_NCTreeAdaptor->fetch_node_by_node_id($nc_tree_id));
00091 
00092   $self->param('member_adaptor', $self->compara_dba->get_MemberAdaptor);
00093   $self->param('nctree_adaptor', $self->compara_dba->get_NCTreeAdaptor);
00094 
00095   my $epo_dba = $self->go_figure_compara_dba($epo_db);
00096   $self->param('epo_gab_adaptor', $epo_dba->get_GenomicAlignBlockAdaptor);
00097   $self->param('epo_mlss_adaptor', $epo_dba->get_MethodLinkSpeciesSetAdaptor);
00098 
00099   my $species_set_adaptor = $self->compara_dba->get_SpeciesSetAdaptor;
00100 
00101 # Do we need two pass in and support two identical sets (epo_gdb and low_cov_gdbs)?
00102 # Aren't they supposed to be different?
00103 
00104   my ($epo_ss) = @{ $species_set_adaptor->fetch_all_by_tag_value('name', 'low-coverage-assembly') };
00105   unless($epo_ss) {
00106     die "Could not fetch a SpeciesSet named 'low-coverage-assembly' from the database\n";
00107   }
00108   $self->param('epo_gdb', {});
00109   foreach my $epo_gdb (@{$epo_ss->genome_dbs}) {
00110       $self->param('epo_gdb')->{$epo_gdb} = 1;
00111   }
00112 
00113   my ($low_cov_ss) = @{ $species_set_adaptor->fetch_all_by_tag_value('name', 'low-coverage-assembly') };
00114   unless($low_cov_ss) {
00115     ($low_cov_ss) = @{ $species_set_adaptor->fetch_all_by_tag_value('name', 'low-coverage') };
00116   }
00117   unless($low_cov_ss) {
00118     die "A SpeciesSet named either 'low-coverage-assembly' or 'low-coverage' must be present in the database to run this analysis\n";
00119   }
00120   $self->param('low_cov_gdbs', {});
00121   foreach my $gdb (@{$low_cov_ss->genome_dbs}) {
00122     $self->param('low_cov_gdbs')->{$gdb->dbID} = 1;
00123   }
00124 
00125 }
00126 
00127 =head2 run
00128 
00129     Title   :   run
00130     Usage   :   $self->run
00131     Function:   runs something
00132     Returns :   none
00133     Args    :   none
00134 
00135 =cut
00136 
00137 sub run {
00138   my $self = shift @_;
00139 
00140   $self->run_ncrecoverepo;
00141   $self->run_low_coverage_best_in_alignment;
00142 }
00143 
00144 
00145 =head2 write_output
00146 
00147     Title   :   write_output
00148     Usage   :   $self->write_output
00149     Function:   stores something
00150     Returns :   none
00151     Args    :   none
00152 
00153 =cut
00154 
00155 
00156 sub write_output {
00157   my $self = shift @_;
00158 
00159   $self->param('predictions_to_add', {});
00160   $self->remove_low_cov_predictions;
00161   $self->add_matching_predictions;
00162 }
00163 
00164 
00165 ##########################################
00166 #
00167 # internal methods
00168 #
00169 ##########################################
00170 
00171 sub run_ncrecoverepo {
00172   my $self = shift;
00173 
00174   my $root_id = $self->param('nc_tree')->node_id;
00175 
00176   my %present_gdbs     = ();
00177   my %absent_gdbs      = ();
00178   my %present_epo_gdbs = ();
00179   
00180   # Find absent gdbs
00181   foreach my $leaf (@{$self->param('nc_tree')->get_all_leaves}) {
00182     $present_gdbs{$leaf->genome_db_id}++;
00183   }
00184   foreach my $present_gdb (keys %present_gdbs) {
00185     if (defined($self->param('epo_gdb')->{$present_gdb})) {
00186       $present_epo_gdbs{$present_gdb} = 1;
00187     }
00188   }
00189   foreach my $epo_gdb (keys %{$self->param('epo_gdb')}) {
00190     if (!defined($present_gdbs{$epo_gdb})) {
00191       $absent_gdbs{$epo_gdb} = 1;
00192     }
00193   }
00194 
00195   my %nc_tree_gene_ids = ();
00196   my $seq_length = 0;
00197 
00198   my $leaves = $self->param('nc_tree')->get_all_leaves;
00199   foreach my $leaf (@$leaves) {
00200     my $description = $leaf->description; $description =~ /Gene\:(\S+)/; my $gene_id = $1;
00201     $nc_tree_gene_ids{$gene_id} = 1;
00202     $seq_length += length($leaf->sequence);
00203   }
00204   
00205   my $avg_seq_length = $seq_length/(scalar @$leaves);
00206 
00207   my %pecan_restricted_gab_hash = ();
00208 
00209   if (keys %absent_gdbs) {
00210     my $pecan_mlss = @{$self->param('epo_mlss_adaptor')->fetch_all_by_method_link_type('PECAN')}->[0];
00211 
00212     foreach my $leaf (@{$self->param('nc_tree')->get_all_leaves}) {
00213       my $gdb_name = $leaf->genome_db->name;
00214       print STDERR "# PECAN $gdb_name\n" if ($self->debug);
00215       next unless(defined($present_epo_gdbs{$leaf->genome_db_id})); # Only for the ones in genomic alignments
00216       my $slice = $leaf->genome_db->db_adaptor->get_SliceAdaptor->fetch_by_transcript_stable_id($leaf->stable_id);
00217       next unless (defined($slice));
00218       my $genomic_align_blocks = $self->param('epo_gab_adaptor')->fetch_all_by_MethodLinkSpeciesSet_Slice($pecan_mlss,$slice);
00219       next unless(0 < scalar(@$genomic_align_blocks));
00220       foreach my $genomic_align_block (@$genomic_align_blocks) {
00221         my $pecan_restricted_gab = $genomic_align_block->restrict_between_reference_positions($slice->start,$slice->end);
00222         next unless (defined($pecan_restricted_gab));
00223         my $gab_start = $pecan_restricted_gab->{restricted_aln_start};
00224         my $gab_end   = $genomic_align_block->length - $pecan_restricted_gab->{restricted_aln_end};
00225         my $boundary = 10;
00226         if (defined($pecan_restricted_gab_hash{$genomic_align_block->dbID})) {
00227           if (
00228               abs($pecan_restricted_gab_hash{$genomic_align_block->dbID}{start} - $gab_start) < $boundary &&
00229               abs($pecan_restricted_gab_hash{$genomic_align_block->dbID}{end}   - $gab_end) < $boundary &&
00230               abs($pecan_restricted_gab_hash{$genomic_align_block->dbID}{slice_length} - $slice->length) < $boundary
00231              ) {
00232             # same genomic alignment region, dont need to go through it again
00233             print STDERR "#   same genomic alignment region, dont need to go through it again\n" if ($self->debug);
00234             next;
00235           }
00236         }
00237         $pecan_restricted_gab_hash{$genomic_align_block->dbID}{start}          = $gab_start;
00238         $pecan_restricted_gab_hash{$genomic_align_block->dbID}{end}            = $gab_end;
00239         $pecan_restricted_gab_hash{$genomic_align_block->dbID}{slice_length}   = $slice->length;
00240         foreach my $genomic_align (@{$pecan_restricted_gab->get_all_GenomicAligns}) {
00241           my $ga_gdb = $genomic_align->genome_db;
00242           next if ($ga_gdb->dbID == $leaf->genome_db_id);
00243           my $core_adaptor = Bio::EnsEMBL::Registry->get_DBAdaptor($ga_gdb->name, "core");
00244           $self->throw("Cannot access core db") unless(defined($core_adaptor));
00245           $core_adaptor->dbc->disconnect_when_inactive(0);
00246           $genomic_align->dnafrag->genome_db->db_adaptor($core_adaptor);
00247           my $other_slice = $genomic_align->get_Slice;
00248           my $other_genome_db_id = $genomic_align->genome_db->dbID;
00249           next unless (defined ($other_slice));
00250           my $genes = $other_slice->get_all_Genes;
00251           my $found_prediction; my $validated_prediction = 0;
00252           print STDERR "#   Other genome: ", $genomic_align->genome_db->name, "\n" if ($self->debug);
00253           foreach my $gene (@$genes) {
00254             my $gene_stable_id = $gene->stable_id;
00255             if (defined($nc_tree_gene_ids{$gene_stable_id})) {
00256               $found_prediction->{$gene_stable_id} = 1;
00257               $validated_prediction = 1;
00258             } elsif ($gene->biotype !~ /coding/) {
00259               $found_prediction->{$gene_stable_id} = 1;
00260               print STDERR "#     $gene_stable_id, biotype:", $gene->biotype, "\n" if ($self->debug);
00261             }
00262           }
00263           if (defined($found_prediction) && 0 == $validated_prediction) {
00264             foreach my $found_gene_stable_id (keys %$found_prediction) {
00265               # Store it in the table
00266               my $sth = $self->compara_dba->dbc->prepare
00267                 ("INSERT IGNORE INTO recovered_member 
00268                            (node_id,
00269                             stable_id,
00270                             genome_db_id) VALUES (?,?,?)");
00271               $sth->execute($root_id,
00272                             $found_gene_stable_id,
00273                             $other_genome_db_id);
00274               $sth->finish;
00275               # See if we can match the RFAM name or RFAM id
00276               my $gene_member = $self->param('member_adaptor')->fetch_by_source_stable_id('ENSEMBLGENE',$found_gene_stable_id);
00277               next unless (defined($gene_member));
00278               my $other_tree = $self->param('nctree_adaptor')->fetch_by_Member_root_id($gene_member);
00279               if (defined($other_tree)) {
00280                 my $other_tree_id = $other_tree->node_id;
00281                 print STDERR  "#     found_description and gene_member, but already in tree $other_tree_id [$root_id]\n" if ($self->debug);
00282                 next;
00283               }
00284               my $description = $gene_member->description;
00285               $description =~ /Acc:(\w+)/;
00286               my $acc_description = $1 if (defined($1));
00287               my $clustering_id = $self->param('nc_tree')->tree->get_tagvalue('clustering_id');
00288               my $model_id = $self->param('nc_tree')->tree->get_tagvalue('model_id');
00289               if ($acc_description eq $clustering_id || $acc_description eq $model_id) {
00290                 $self->param('predictions_to_add')->{$found_gene_stable_id} = 1;
00291               } else {
00292                 print STDERR "#     found_prediction but Acc not mapped: $acc_description [$clustering_id - $model_id]\n" if ($self->debug);
00293               }
00294             }
00295           }
00296           # We don't have a gene prediction here, so we try to predict one
00297           if (!defined($found_prediction)) {
00298             my $start   = $other_slice->start;
00299             my $end     = $other_slice->end;
00300             my $seqname = $other_slice->seq_region_name;
00301             my $sequence = $other_slice->seq; $sequence =~ s/N//g;
00302             my $length = length($sequence);
00303             next if (0 == $length);
00304             next if (($avg_seq_length/$length) > 1.2 ||
00305                      ($avg_seq_length/$length) < 0.8);
00306             my $found_gene_stable_id = "$seqname:$start-$end";
00307             my $sth = $self->compara_dba->dbc->prepare
00308               ("INSERT IGNORE INTO recovered_member 
00309                            (node_id,
00310                             stable_id,
00311                             genome_db_id) VALUES (?,?,?)");
00312             $sth->execute($root_id,
00313                           $found_gene_stable_id,
00314                           $other_genome_db_id);
00315             $sth->finish;
00316             print STDERR "#     no_prediction\n" if ($self->debug);
00317             # Use the RFAM model to see if the sequence is good
00318           }
00319         }
00320       }
00321     }
00322   }
00323 
00324   return 1;
00325 }
00326 
00327 sub run_low_coverage_best_in_alignment {
00328   my $self = shift;
00329 
00330   $self->param('epo_low_cov_gdbs', {});
00331 
00332   my $epo_low_mlss = @{$self->param('epo_mlss_adaptor')->fetch_all_by_method_link_type('EPO_LOW_COVERAGE')}->[0];
00333   foreach my $genome_db (@{$epo_low_mlss->species_set}) {
00334     $self->param('epo_low_cov_gdbs')->{$genome_db->dbID}++;
00335   }
00336 
00337   my %epo_low_restricted_gab_hash = ();
00338   my %epo_low_restricted_gabIDs = ();
00339 
00340   # First round to get the candidate GenomicAlignTrees
00341   foreach my $leaf (@{$self->param('nc_tree')->get_all_leaves}) {
00342     my $gdb_name = $leaf->genome_db->name;
00343     next if (defined($self->param('low_cov_gdbs')->{$leaf->genome_db_id}));
00344 
00345     next unless (defined($self->param('epo_low_cov_gdbs')->{$leaf->genome_db_id}));
00346     my $slice = $leaf->genome_db->db_adaptor->get_SliceAdaptor->fetch_by_transcript_stable_id($leaf->stable_id);
00347     next unless (defined($slice));
00348     my $genomic_align_blocks = $self->param('epo_gab_adaptor')->fetch_all_by_MethodLinkSpeciesSet_Slice($epo_low_mlss,$slice);
00349     next unless(0 < scalar(@$genomic_align_blocks));
00350     print STDERR "# CANDIDATE EPO_LOW_COVERAGE $gdb_name\n" if ($self->debug);
00351     foreach my $genomic_align_block (@$genomic_align_blocks) {
00352       if (!defined($genomic_align_block->dbID)) {
00353         # It's considered 2x in the epo_low_cov, so add to the list and skip
00354         $self->param('epo_low_cov_gdbs')->{$leaf->genome_db_id}++;
00355         next;
00356       }
00357       my $epo_low_restricted_gab = $genomic_align_block->restrict_between_reference_positions($slice->start,$slice->end);
00358       next unless (defined($epo_low_restricted_gab));
00359       my $gab_start = $epo_low_restricted_gab->{restricted_aln_start};
00360       my $gab_end   = $genomic_align_block->length - $epo_low_restricted_gab->{restricted_aln_end};
00361       my $boundary = 10;
00362       $epo_low_restricted_gab_hash{$leaf->genome_db_id}{gabID}          = $genomic_align_block->dbID;
00363       $epo_low_restricted_gab_hash{$leaf->genome_db_id}{start}          = $gab_start;
00364       $epo_low_restricted_gab_hash{$leaf->genome_db_id}{end}            = $gab_end;
00365       $epo_low_restricted_gab_hash{$leaf->genome_db_id}{slice_length}   = $slice->length;
00366       $epo_low_restricted_gab_hash{$leaf->genome_db_id}{gdb_name}       = $gdb_name;
00367       $epo_low_restricted_gabIDs{$genomic_align_block->dbID}++;
00368     }
00369   }
00370   my $max = 0; my $max_gabID;
00371   foreach my $gabID (keys %epo_low_restricted_gabIDs) {
00372     my $count = $epo_low_restricted_gabIDs{$gabID};
00373     if ($count > $max) {$max = $count; $max_gabID = $gabID};
00374   }
00375 
00376   my %low_cov_leaves_pmember_id_slice_to_check_coord_system = ();
00377   my %low_cov_slice_seqs = ();
00378   $self->param('low_cov_leaves_to_delete_pmember_id', {});
00379 
00380   # Second round to get the low-covs on the max_gabID
00381   foreach my $leaf (@{$self->param('nc_tree')->get_all_leaves}) {
00382     my $gdb_name = $leaf->genome_db->name;
00383     next unless (defined($self->param('low_cov_gdbs')->{$leaf->genome_db_id}));
00384     next unless (defined($self->param('epo_low_cov_gdbs')->{$leaf->genome_db_id}));
00385     my $slice = $leaf->genome_db->db_adaptor->get_SliceAdaptor->fetch_by_transcript_stable_id($leaf->stable_id);
00386     $self->throw("Unable to fetch slice for this genome_db leaf: $gdb_name") unless (defined($slice));
00387     $low_cov_slice_seqs{$leaf->genome_db_id}{$leaf->member_id} = $slice;
00388     my $low_cov_genomic_align_blocks = $self->param('epo_gab_adaptor')->fetch_all_by_MethodLinkSpeciesSet_Slice($epo_low_mlss,$slice);
00389     unless (0 < scalar(@$low_cov_genomic_align_blocks)) {
00390       # $DB::single=1;1;
00391       $self->param('low_cov_leaves_to_delete_pmember_id')->{$leaf->member_id} = $leaf->gene_member->stable_id;
00392       next;
00393     }
00394     print STDERR "# EPO_LOW_COVERAGE $gdb_name\n" if ($self->debug);
00395     foreach my $low_cov_genomic_align_block (@$low_cov_genomic_align_blocks) {
00396       unless ($low_cov_genomic_align_block->{original_dbID} == $max_gabID) {
00397         # We delete this leaf because it's a low_cov slice that is not in the epo_low_cov, so it's the best in alignment
00398         # $DB::single=1;1;
00399         $self->param('low_cov_leaves_to_delete_pmember_id')->{$leaf->member_id} = $leaf->gene_member->stable_id;
00400       } else {
00401         $low_cov_leaves_pmember_id_slice_to_check_coord_system{$leaf->member_id} = $leaf->gene_member->stable_id;
00402       }
00403     }
00404   }
00405 
00406   my %low_cov_same_slice = ();
00407 
00408   foreach my $genome_db_id (keys %low_cov_slice_seqs) {
00409     my @member_ids = keys %{$low_cov_slice_seqs{$genome_db_id}};
00410     next if (2 > scalar @member_ids);
00411     while (my $member_id1 = shift (@member_ids)) {
00412       foreach my $member_id2 (@member_ids) {
00413         my $slice1 = $low_cov_slice_seqs{$genome_db_id}{$member_id1};
00414         my $coord_level1 = $slice1->coord_system->is_top_level;
00415         my $slice2 = $low_cov_slice_seqs{$genome_db_id}{$member_id2};
00416         my $coord_level2 = $slice2->coord_system->is_top_level;
00417         if (0 < abs($coord_level1-$coord_level2)) {
00418           if ($coord_level2 < $coord_level1) {
00419             my $temp_slice = $slice1; $slice1 = $slice2; $slice2 = $temp_slice;
00420             my $temp_member_id = $member_id1; $member_id1 = $member_id2; $member_id2 = $temp_member_id;
00421           }
00422         }
00423         my $mapped_slice2 = @{$slice2->project($slice1->coord_system->name)}->[0];
00424         next unless(defined($mapped_slice2)); # no projection, so pair of slices are different
00425         my $proj_slice2 = $mapped_slice2->to_Slice;
00426         if ($slice1->seq_region_name eq $proj_slice2->seq_region_name &&
00427             $slice1->start           eq $proj_slice2->start           &&
00428             $slice1->end             eq $proj_slice2->end) {
00429           $low_cov_same_slice{$member_id1} = $member_id2;
00430         }
00431       }
00432     }
00433   }
00434 
00435   foreach my $member_id1 (keys %low_cov_same_slice) {
00436     my $member_id2 = $low_cov_same_slice{$member_id1};
00437     if (defined ($low_cov_leaves_pmember_id_slice_to_check_coord_system{$member_id2})) {
00438       # We found this slice in the genomic alignment, but it's same
00439       # slice as another higher rank slice, so goes to the delete list
00440       my $stable_id2 = $low_cov_leaves_pmember_id_slice_to_check_coord_system{$member_id2};
00441       # $DB::single=1;1;
00442       $self->param('low_cov_leaves_to_delete_pmember_id')->{$member_id2} = $stable_id2;
00443     }
00444   }
00445 }
00446 
00447 sub remove_low_cov_predictions {
00448   my $self = shift;
00449   my $root_id = $self->param('nc_tree')->node_id;
00450 
00451   # Remove low cov members that are not best in alignment
00452   foreach my $leaf (@{$self->param('nc_tree')->get_all_leaves}) {
00453     if(my $removed_stable_id = $self->param('low_cov_leaves_to_delete_pmember_id')->{$leaf->member_id}) {
00454       print STDERR "removing low_cov prediction $removed_stable_id\n" if($self->debug);
00455       my $removed_genome_db_id = $leaf->genome_db_id;
00456       $leaf->disavow_parent;
00457       $self->param('nctree_adaptor')->delete_flattened_leaf($leaf);
00458       my $sth = $self->compara_dba->dbc->prepare
00459         ("INSERT IGNORE INTO removed_member 
00460                            (node_id,
00461                             stable_id,
00462                             genome_db_id) VALUES (?,?,?)");
00463       $sth->execute($root_id,
00464                     $removed_stable_id,
00465                     $removed_genome_db_id);
00466       $sth->finish;
00467     }
00468   }
00469   #calc residue count total
00470   my $leafcount = scalar(@{$self->param('nc_tree')->get_all_leaves});
00471   $self->param('nc_tree')->tree->store_tag('gene_count', $leafcount);
00472 
00473   return 1;
00474 }
00475 
00476 sub add_matching_predictions {
00477   my $self = shift;
00478 
00479   # Insert the members that are found new and have matching Acc
00480   foreach my $gene_stable_id_to_add (keys %{$self->param('predictions_to_add')}) {
00481     my $gene_member = $self->param('member_adaptor')->fetch_by_source_stable_id('ENSEMBLGENE',$gene_stable_id_to_add);
00482     # Incorporate this member into the cluster
00483     my $node = new Bio::EnsEMBL::Compara::GeneTreeNode;
00484     $self->param('nc_tree')->add_child($node);
00485     #leaves are GeneTreeNode objects, bless to make into GeneTreeMember objects
00486     bless $node, "Bio::EnsEMBL::Compara::GeneTreeMember";
00487 
00488     #the building method uses member_id's to reference unique nodes
00489     #which are stored in the node_id value, copy to member_id
00490     $node->member_id($gene_member->get_canonical_peptide_Member->member_id);
00491     $node->method_link_species_set_id($self->param('mlss_id'));
00492     # We won't do the store until the end, otherwise it will affect the main loop
00493     print STDERR "adding matching prediction $gene_stable_id_to_add\n" if($self->debug);
00494     $self->param('nctree_adaptor')->store($node);
00495   }
00496 
00497   #calc residue count total
00498   my $leafcount = scalar(@{$self->param('nc_tree')->get_all_leaves});
00499   $self->param('nc_tree')->tree->store_tag('gene_count', $leafcount);
00500 
00501   return 1;
00502 }
00503 
00504 1;