Archive Ensembl HomeArchive Ensembl Home
ResultAnalyser.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::IdMapping::ResultAnalyser - analyse stable Id mapping results
00024 
00025 =head1 SYNOPSIS
00026 
00027   # get a result analyser
00028   my $analyser = Bio::EnsEMBL::IdMapping::ResultAnalyser->new(
00029     -LOGGER => $logger,
00030     -CONF   => $conf,
00031     -CACHE  => $cache
00032   );
00033 
00034   # analyse results
00035   $analyser->analyse( $gene_mappings,
00036     $stable_id_mapper->get_all_stable_id_events('similarity') );
00037 
00038   # write results to file
00039   $analyser->write_results_to_file;
00040 
00041   # create click lists
00042   $analyser->create_clicklist;
00043 
00044   # mapping_summary
00045   $analyser->create_mapping_summary;
00046 
00047 =head1 DESCRIPTION
00048 
00049 This is a utility module which analyses the stable Id mapping results
00050 by providing various sorts of mapping statistics. It also creates
00051 clicklists and a mapping summary.
00052 
00053 =head1 METHODS
00054 
00055   analyse
00056   analyse_db
00057   classify_source_genes_by_type
00058   classify_genes_by_mapping_simple
00059   classify_genes_by_mapping
00060   add
00061   get
00062   get_all_by_subclass
00063   get_all_by_class
00064   get_count_by_subclass
00065   get_count_by_class
00066   get_all_classes
00067   class_key
00068   write_results_to_file
00069   create_clicklist
00070   create_mapping_summary
00071   read_from_file
00072 
00073 =cut
00074 
00075 
00076 package Bio::EnsEMBL::IdMapping::ResultAnalyser;
00077 
00078 use strict;
00079 use warnings;
00080 no warnings 'uninitialized';
00081 
00082 use Bio::EnsEMBL::IdMapping::BaseObject;
00083 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
00084 
00085 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00086 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
00087 
00088 
00089 =head2 analyse
00090 
00091   Arg[1]      : Bio::EnsEMBL::IdMapping::MappingList $gene_mappings - the gene
00092                 mappings to analyse
00093   Arg[2]      : Arrayref of Strings - similarity events
00094   Example     : $analyser->analyse($gene_mappings,
00095                   $stable_id_mapper->get_all_stable_id_events('similarity'));
00096   Description : Analyses the results of a stable Id mapping run.
00097   Return type : none
00098   Exceptions  : thrown on wrong or missing arguments
00099   Caller      : general
00100   Status      : At Risk
00101               : under development
00102 
00103 =cut
00104 
00105 sub analyse {
00106   my $self = shift;
00107   my $gene_mappings = shift;
00108   my $similarity_events = shift;
00109   
00110   # argument check
00111   unless ($gene_mappings and
00112           $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
00113     throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
00114   }
00115 
00116   unless ($similarity_events and ref($similarity_events) eq 'ARRAY') {
00117     throw("Need a list of similarity events.");
00118   }
00119 
00120   # classify source genes by type (status-logic_name-biotype)
00121   $self->classify_source_genes_by_type;
00122 
00123   # classify source genes by mapping status
00124   $self->classify_genes_by_mapping($gene_mappings, $similarity_events);
00125 }
00126 
00127 
00128 =head2 classify_source_genes_by_type
00129 
00130   Example     : $analyser->classify_source_genes_by_type;
00131   Description : Classifies source genes by type and adds them to the internal
00132                 datastructure. For the format of the classification string see
00133                 class_key().
00134   Return type : none
00135   Exceptions  : none
00136   Caller      : internal
00137   Status      : At Risk
00138               : under development
00139 
00140 =cut
00141 
00142 sub classify_source_genes_by_type {
00143   my $self = shift;
00144 
00145   foreach my $s_gene (values %{ $self->cache->get_by_name('genes_by_id', 'source') }) {
00146     $self->add('source', $self->class_key($s_gene), 'all', $s_gene->stable_id);
00147   }
00148 }
00149 
00150 
00151 =head2 classify_genes_by_mapping_simple
00152 
00153   Arg[1]      : Bio::EnsEMBL::IdMapping::MapppingList $gene_mappings - gene
00154                 mappings to classify
00155   Example     : $analyser->classify_genes_by_mapping_simple;
00156   Description : Classifies target genes by mapping ('mapped' or 'unmapped').
00157   Return type : none
00158   Exceptions  : thrown on wrong or missing argument
00159   Caller      : This method is not in use at the momen.
00160   Status      : At Risk
00161               : under development
00162 
00163 =cut
00164 
00165 sub classify_genes_by_mapping_simple {
00166   my $self = shift;
00167   my $gene_mappings = shift;
00168 
00169   # argument check
00170   unless ($gene_mappings and
00171           $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
00172     throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
00173   }
00174 
00175   my %result = ();
00176   
00177   # firrst, create a lookup hash of source genes by target internal ID
00178   my %source_genes_by_target = ();
00179   foreach my $e (@{ $gene_mappings->get_all_Entries }) {
00180     my $s_gene = $self->cache->get_by_key('genes_by_id', 'source', $e->source);
00181     my $t_gene = $self->cache->get_by_key('genes_by_id', 'target', $e->target);
00182     $source_genes_by_target{$t_gene->id} = $s_gene;
00183   }
00184 
00185   # now loop over target genes
00186   foreach my $t_gene (values %{ $self->cache->get_by_name('genes_by_id', 'target') }) {
00187   
00188     # check if target gene has all required properties set
00189     unless ($t_gene->status and $t_gene->logic_name and $t_gene->biotype) {
00190       $self->logger->warning("Missing data for target gene: ".
00191         $t_gene->to_string."\n", 1);
00192     }
00193 
00194     my $class = $self->class_key($t_gene);
00195 
00196     # classify as '1' if mapped (using source gene's stable ID), otherwise '0'
00197     if (my $s_gene = $source_genes_by_target{$t_gene->id}) {
00198       $self->add('target', $class, 'mapped', $s_gene->stable_id);
00199     } else {
00200       $self->add('target', $class, 'unmapped', $t_gene->stable_id);
00201     }
00202 
00203   }
00204 }
00205 
00206 
00207 =head2 classify_genes_by_mapping
00208 
00209   Arg[1]      : Bio::EnsEMBL::IdMapping::MapppingList $gene_mappings - gene
00210                 mappings to classify
00211   Arg[2]      : Arrayref of Strings - similarity events
00212   Example     : $analyser->classify_genes_by_mapping;
00213   Description : Classifies genes by mapping. Status is
00214                   'mapped' => stable Id was mapped
00215                   'lost_similar' => stable Id not mapped, but there is a
00216                                     similarity entry for the source Id
00217                   'lost_definite' => not mapped and no similarity
00218   Return type : none
00219   Exceptions  : thrown on wrong or missing argument
00220   Caller      : This method is not in use at the momen.
00221   Status      : At Risk
00222               : under development
00223 
00224 =cut
00225 
00226 sub classify_genes_by_mapping {
00227   my $self = shift;
00228   my $gene_mappings = shift;
00229   my $similarity_events = shift;
00230   
00231   # argument check
00232   unless ($gene_mappings and
00233           $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
00234     throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
00235   }
00236 
00237   unless ($similarity_events and ref($similarity_events) eq 'ARRAY') {
00238     throw("Need a list of similarity events.");
00239   }
00240 
00241   # mapped genes
00242   foreach my $e (@{ $gene_mappings->get_all_Entries }) {
00243     my $s_gene = $self->cache->get_by_key('genes_by_id', 'source', $e->source);
00244     $self->add('source', $self->class_key($s_gene), 'mapped',
00245       $s_gene->stable_id);
00246   }
00247 
00248   # lookup hash for similarities
00249   my %similar = ();
00250   foreach my $event (@{ $similarity_events }) {
00251     my ($stable_id) = split("\t", $event);
00252     $similar{$stable_id} = 1;
00253   }
00254   
00255   # deleted genes
00256   foreach my $s_gene (values %{ $self->cache->get_by_name('genes_by_id', 'source') }) {
00257     
00258     my $stable_id = $s_gene->stable_id;
00259     my $class = $self->class_key($s_gene);
00260 
00261     unless ($self->get('source', $class, 'mapped', $stable_id)) {
00262 
00263       # sub-classify as 'lost_similar' or 'lost_definite'
00264       if ($similar{$stable_id}) {
00265         $self->add('source', $class, 'lost_similar', $stable_id);
00266       } else {
00267         $self->add('source', $class, 'lost_definite', $stable_id);
00268       }
00269 
00270     }
00271   }
00272   
00273 }
00274 
00275 
00276 =head2 add
00277 
00278   Arg[1]      : String $dbtype - db type ('source' or 'target')
00279   Arg[2]      : String $class - key identifying a gene type (see class_key())
00280   Arg[3]      : String $subclass - status identifier (e.g. 'mapped', 'lost')
00281   Arg[4]      : String $stable_id - gene stable Id
00282   Arg[5]      : String $val - value (usually 0 or 1)
00283   Example     : $analyser->add('source', 'KNOWN-ensembl-protein_coding',
00284                   'mapped', 'ENSG00002342', 1);
00285   Description : Add a stable Id / property pair to a name/dbtype lookup hash.
00286   
00287                 The datastructure is a bit of a bloat, but is general enough to
00288                 be used as a lookup hash and to generate statistics (counts by
00289                 type) and debug lists (dump by type).
00290   Return type : String - the added value
00291   Exceptions  : none
00292   Caller      : internal
00293   Status      : At Risk
00294               : under development
00295 
00296 =cut
00297 
00298 sub add {
00299   my ($self, $dbtype, $class, $subclass, $stable_id, $val) = @_;
00300 
00301   # private method, so no argument check done for performance reasons
00302 
00303   # default to a value of '1'
00304   $val = 1 unless (defined($val));
00305 
00306   $self->{$dbtype}->{$class}->{$subclass}->{$stable_id} = $val;
00307 }
00308 
00309 
00310 =head2 get
00311 
00312   Arg[1]      : String $dbtype - db type ('source' or 'target')
00313   Arg[2]      : String $class - key identifying a gene type (see class_key())
00314   Arg[3]      : String $subclass - status identifier (e.g. 'mapped', 'lost')
00315   Arg[4]      : String $stable_id - gene stable Id
00316   Example     : my $mapping_status = $analyser->get('source',
00317                   'KNOWN-ensembl-protein_coding', 'mapped', 'ENSG00002342');
00318   Description : Gets a stable Id mapping status from the internal datastructure.
00319   Return type : String
00320   Exceptions  : none
00321   Caller      : internal
00322   Status      : At Risk
00323               : under development
00324 
00325 =cut
00326 
00327 sub get {
00328   my ($self, $dbtype, $class, $subclass, $stable_id) = @_;
00329 
00330   # private method, so no argument check done for performance reasons
00331 
00332   return $self->{$dbtype}->{$class}->{$subclass}->{$stable_id};
00333 }
00334 
00335 
00336 =head2 get_all_by_subclass
00337 
00338   Arg[1]      : String $dbtype - db type ('source' or 'target')
00339   Arg[2]      : String $class - key identifying a gene type (see class_key())
00340   Arg[3]      : String $subclass - status identifier (e.g. 'mapped', 'lost')
00341   Example     : my @mapped_stable_ids = @{
00342                   $analyser->get_all_by_subclass(
00343                     'source', 'KNOWN-ensembl-protein_coding',
00344                     'mapped'
00345                   ) };
00346   Description : Gets a list of stable Id for a given subclass.
00347   Return type : Arrayref of String (stable Ids)
00348   Exceptions  : thrown on missing arguments
00349   Caller      : internal
00350   Status      : At Risk
00351               : under development
00352 
00353 =cut
00354 
00355 sub get_all_by_subclass {
00356   my ($self, $dbtype, $class, $subclass) = @_;
00357 
00358   # argument check
00359   throw("Need a dbtype (source|target).") unless ($dbtype);
00360   throw("Need a class.") unless ($class);
00361   throw("Need a subclass.") unless ($subclass);
00362 
00363   return [ keys %{ $self->{$dbtype}->{$class}->{$subclass} || {} } ];
00364 }
00365 
00366 
00367 =head2 get_all_by_class
00368 
00369   Arg[1]      : String $dbtype - db type ('source' or 'target')
00370   Arg[2]      : String $class - key identifying a gene type (see class_key())
00371   Example     : my @stable_ids = @{
00372                   $analyser->get_all_by_class( 'source',
00373                     'KNOWN-ensembl-protein_coding' ) };
00374   Description : Gets a list of stable Id for a given class.
00375   Return type : Arrayref of String (stable Ids)
00376   Exceptions  : thrown on missing arguments
00377   Caller      : internal
00378   Status      : At Risk
00379               : under development
00380 
00381 =cut
00382 
00383 sub get_all_by_class {
00384   my ($self, $dbtype, $class) = @_;
00385 
00386   # argument check
00387   throw("Need a dbtype (source|target).") unless ($dbtype);
00388   throw("Need a class.") unless ($class);
00389 
00390   my %merged = ();
00391 
00392   foreach my $subclass (keys %{ $self->{$dbtype}->{$class} || {} }) {
00393     while (my ($key, $val) = each(%{ $self->{$dbtype}->{$class}->{$subclass} })) {
00394       $merged{$key} = $val;
00395     }
00396   }
00397 
00398   return [ keys %merged ];
00399 }
00400 
00401 
00402 =head2 get_count_by_subclass
00403 
00404   Arg[1]      : String $dbtype - db type ('source' or 'target')
00405   Arg[2]      : String $class - key identifying a gene type (see class_key())
00406   Arg[3]      : String $subclass - status identifier (e.g. 'mapped', 'lost')
00407   Example     : my $num_mapped = $analyser->get_count_by_subclass('source',
00408                   'KNOWN-ensembl-protein_coding', 'mapped');
00409   Description : Gets the number of stable Ids for a given subclass.
00410   Return type : Int
00411   Exceptions  : thrown on missing arguments
00412   Caller      : internal
00413   Status      : At Risk
00414               : under development
00415 
00416 =cut
00417 
00418 sub get_count_by_subclass {
00419   my ($self, $dbtype, $class, $subclass) = @_;
00420 
00421   # argument check
00422   throw("Need a dbtype (source|target).") unless ($dbtype);
00423   throw("Need a class.") unless ($class);
00424   throw("Need a subclass.") unless ($subclass);
00425 
00426   return scalar(keys %{ $self->{$dbtype}->{$class}->{$subclass} || {} });
00427 }
00428 
00429 
00430 =head2 get_count_by_class
00431 
00432   Arg[1]      : String $dbtype - db type ('source' or 'target')
00433   Arg[2]      : String $class - key identifying a gene type (see class_key())
00434   Example     : my $num_mapped = $analyser->get_count_by_class('source',
00435                   'KNOWN-ensembl-protein_coding');
00436   Description : Gets the number of stable Ids for a given class.
00437   Return type : Int
00438   Exceptions  : thrown on missing arguments
00439   Caller      : internal
00440   Status      : At Risk
00441               : under development
00442 
00443 =cut
00444 
00445 sub get_count_by_class {
00446   my ($self, $dbtype, $class) = @_;
00447 
00448   # argument check
00449   throw("Need a dbtype (source|target).") unless ($dbtype);
00450   throw("Need a class.") unless ($class);
00451 
00452   return scalar(@{ $self->get_all_by_class($dbtype, $class) });
00453 }
00454 
00455 
00456 =head2 get_all_classes
00457 
00458   Arg[1]      : String $dbtype - db type ('source' or 'target')
00459   Example     : foreach my $class (@{ $analyser->get_all_classes('source') }) {
00460                   print "$class\n";
00461                 }
00462   Description : Gets a list of classes in the ResultAnalyser.
00463   Return type : Arrayref of String
00464   Exceptions  : thrown on missing argument
00465   Caller      : internal
00466   Status      : At Risk
00467               : under development
00468 
00469 =cut
00470 
00471 sub get_all_classes {
00472   my ($self, $dbtype) = @_;
00473 
00474   # argument check
00475   throw("Need a dbtype (source|target).") unless ($dbtype);
00476 
00477   return [ sort keys %{ $self->{$dbtype} || {} } ];
00478 }
00479 
00480 
00481 =head2 class_key
00482 
00483   Arg[1]      : Bio::EnsEMBL::IdMapping::TinyGene $gene - a gene object
00484   Example     : my $class = $analyser->class_key($gene);
00485   Description : Generates a key identifying a gene class. This identifier is 
00486                 composed from the gene's status, logic naame, and biotye.
00487   Return type : String
00488   Exceptions  : none
00489   Caller      : internal
00490   Status      : At Risk
00491               : under development
00492 
00493 =cut
00494 
00495 sub class_key {
00496   my ($self, $gene) = @_;
00497   return join('-', map { $gene->$_ } qw(status logic_name biotype));
00498 }
00499 
00500 
00501 =head2 write_results_to_file
00502 
00503   Example     : $analyser->write_results_to_file;
00504   Description : Writes the results of the result analysis to a file. This is a 
00505                 human-readable text detailing the mapping statistics.
00506   Return type : none
00507   Exceptions  : none
00508   Caller      : general
00509   Status      : At Risk
00510               : under development
00511 
00512 =cut
00513 
00514 sub write_results_to_file { 
00515   my $self = shift;
00516 
00517   my $fh = $self->get_filehandle('gene_detailed_mapping_stats.txt', 'stats');
00518 
00519   my $fmt1 = "%-60s%-16s%-16s%-16s\n";
00520   my $fmt2 = "%-60s%5.0f (%7s) %5.0f (%7s) %5.0f (%7s)\n";
00521   my $fmt3 = "%3.2f%%";
00522 
00523   print $fh "Gene detailed mapping results:\n\n";
00524 
00525   print $fh sprintf($fmt1, "Gene type", "mapped", "lost (similar)",
00526     "lost (definite)");
00527 
00528   print $fh ('-'x108), "\n";
00529 
00530   foreach my $class (@{ $self->get_all_classes('source') }) {
00531     next if ($class eq 'all');
00532 
00533     my $total = $self->get_count_by_class('source', $class);
00534 
00535     # avoid division by zero error
00536     unless ($total) {
00537       $self->logger->warning("No count found for $class.\n", 1);
00538       next;
00539     }
00540     
00541     my $mapped = $self->get_count_by_subclass('source', $class, 'mapped');
00542     my $similar = $self->get_count_by_subclass('source', $class,
00543       'lost_similar');
00544     my $lost = $self->get_count_by_subclass('source', $class, 'lost_definite');
00545 
00546     print $fh sprintf($fmt2,
00547                       $class,
00548                       $mapped,  sprintf($fmt3, $mapped/$total*100),
00549                       $similar, sprintf($fmt3, $similar/$total*100),
00550                       $lost,    sprintf($fmt3, $lost/$total*100));
00551   }
00552 
00553   close($fh);
00554 }
00555 
00556 
00557 =head2 create_clicklist
00558 
00559   Example     : $analyser->create_clicklist;
00560   Description : Writes an html file which contains a list of all lost genes,
00561                 with hyperlinks to the appropriate archive website. This is to
00562                 manually check lost genes.
00563   Return type : none
00564   Exceptions  : none
00565   Caller      : general
00566   Status      : At Risk
00567               : under development
00568 
00569 =cut
00570 
00571 sub create_clicklist {
00572   my $self = shift;
00573 
00574   my $fh = $self->get_filehandle('genes_lost.html', 'stats');
00575 
00576   # start html output
00577   print $fh qq(<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n);
00578   print $fh qq(<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-gb"  lang="en-gb">);
00579   print $fh "<head>\n";
00580   print $fh "<title>Lost genes ";
00581   print $fh $self->conf->param('sourcedbname'), ' -&gt; ',
00582             $self->conf->param('targetdbname');
00583   print $fh "</title>\n";
00584   print $fh "</head>\n<body>\n";
00585 
00586   my $prefix = $self->conf->param('urlprefix');
00587   unless ($prefix) {
00588     $self->logger->warning("No urlprefix set, clicklists might not be useable.\n", 1);
00589   }
00590 
00591   my $navigation;
00592   my $clicklist;
00593 
00594   foreach my $class (@{ $self->get_all_classes('source') }) {
00595     next if ($class eq 'all');
00596 
00597     $navigation .= "$class ";
00598     $clicklist .= "<h1>$class</h1>\n";
00599     
00600     foreach my $subclass (qw(lost_similar lost_definite)) {
00601 
00602       # navigation
00603       $navigation .= qq(<a href="#${class}-$subclass">$subclass</a> );
00604       
00605       # clicklist
00606       $clicklist .= "<h2>$subclass</h2>\n";
00607 
00608       foreach my $stable_id (@{ $self->get_all_by_subclass('source', $class, $subclass) }) {
00609         $clicklist .= qq(<a href="${prefix}$stable_id">$stable_id</a><br />\n);
00610       }
00611       
00612     }
00613 
00614     $navigation .= "<br />\n";
00615   }
00616 
00617   # print navigation and clicklist
00618   print $fh "$navigation\n\n";
00619   print $fh "$clicklist\n\n";
00620 
00621   # html footer
00622   print $fh "</body></html>\n";
00623 
00624   close($fh);
00625 }
00626 
00627 
00628 =head2 create_mapping_summary
00629 
00630   Example     : $analyser->create_mapping_summary();
00631   Description : Writes a text file containing a summary of the mapping stats.
00632                 This will be emailed to the genebuilder for evaluation (you will
00633                 have to manually send the email, using the text in
00634                 "mapping_summary.txt" as the template).
00635   Return type : none
00636   Exceptions  : none
00637   Caller      : general
00638   Status      : At Risk
00639               : under development
00640 
00641 =cut
00642 
00643 sub create_mapping_summary {
00644   my $self = shift;
00645   
00646   my $fh = $self->get_filehandle('mapping_summary.txt');
00647 
00648   #
00649   # title
00650   # 
00651   print $fh qq(Stable ID mapping results\n);
00652   print $fh qq(=========================\n\n);
00653 
00654   #
00655   # timing
00656   #
00657   print $fh "Run at:  ".localtime()."\n";
00658   print $fh "Runtime: ";
00659   print $fh $self->logger->runtime, "\n\n";
00660 
00661   #
00662   # parameters used for this run
00663   #
00664   print $fh $self->conf->list_param_values;
00665   print $fh "\n";
00666 
00667   #
00668   # mapping stats
00669   #
00670   foreach my $type (qw(exon transcript translation gene gene_detailed)) {
00671     my $filename = "${type}_mapping_stats.txt";
00672     
00673     if ($self->file_exists($filename, 'stats')) {
00674       print $fh $self->read_from_file($filename, 'stats');
00675       print $fh "\n\n";
00676     } else {
00677       print $fh "No mapping stats found for $type.\n\n";
00678     }
00679   }
00680 
00681   #
00682   # db uploads
00683   #
00684   my @uploads = (
00685     ['stable_ids'  => 'Stable IDs'],
00686     ['events'      => 'Stable ID events and mapping session'],
00687     ['archive'     => 'Gene and peptide archive'],
00688   );
00689   
00690   my $fmt1 = "%-40s%-20s\n";
00691 
00692   print $fh qq(Data uploaded to db:\n);
00693   print $fh qq(====================\n\n);
00694 
00695   if ($self->conf->param('dry_run')) {
00696    
00697     print $fh "None (dry run).\n";
00698   
00699   } else {
00700   
00701     foreach my $u (@uploads) {
00702       my $uploaded = 'no';
00703       $uploaded = 'yes' if ($self->conf->is_true("upload_".$u->[0]));
00704       print $fh sprintf($fmt1, $u->[1], $uploaded);
00705     }
00706     
00707   }
00708 
00709   print $fh "\n";
00710 
00711   #
00712   # stats and clicklist
00713   #
00714   my @output = (
00715     ['stats'    => 'statistics (including clicklists of deleted IDs)'],
00716     ['debug'    => 'detailed mapping output for debugging'],
00717     ['tables'   => 'data files for db upload'],
00718   );
00719   
00720   my $fmt2 = "%-20s%-50s\n";
00721 
00722   print $fh qq(\nOutput directories:\n);
00723   print $fh qq(===================\n\n);
00724 
00725   print $fh sprintf($fmt2, qw(DIRECTORY DESCRIPTION));
00726   print $fh ('-'x72), "\n";
00727 
00728   print $fh sprintf($fmt2, 'basedir', $self->conf->param('basedir'));
00729 
00730   foreach my $o (@output) {
00731     print $fh sprintf($fmt2, '$basedir/'.$o->[0], $o->[1]);
00732   }
00733 
00734   print $fh "\n";
00735 
00736   #
00737   # clicklist of first 10 deleted genes
00738   #
00739   print $fh qq(\nFirst 10 deleted known genes:\n);
00740   print $fh qq(=============================\n\n);
00741 
00742   my $in_fh = $self->get_filehandle('genes_lost.txt', 'debug', '<');
00743   my $prefix = $self->conf->param('urlprefix');
00744   my $i;
00745   
00746   while (<$in_fh>) {
00747     last if (++$i > 10);
00748     
00749     chomp;
00750     my ($stable_id, $type) = split(/\s+/);
00751     
00752     next unless ($type eq 'known');
00753 
00754     print $fh sprintf($fmt2, $stable_id, "${prefix}$stable_id");
00755   }
00756 
00757   close($in_fh);
00758   close($fh);
00759 }
00760 
00761 
00762 =head2 read_from_file
00763 
00764   Arg[1]      : String $filename - name of file to read
00765   Arg[2]      : (optional) String $append - directory name to append to basedir
00766   Example     : my $stats_text = $analyser->read_from_file('gene_mapping_stats',
00767                   'stats');
00768   Description : Reads mapping stats from a file.
00769   Return type : String
00770   Exceptions  : none
00771   Caller      : internal
00772   Status      : At Risk
00773               : under development
00774 
00775 =cut
00776 
00777 sub read_from_file {
00778   my $self = shift;
00779   my $filename = shift;
00780   my $append = shift;
00781 
00782   my $in_fh = $self->get_filehandle($filename, $append, '<');
00783 
00784   my $txt;
00785 
00786   while (<$in_fh>) {
00787     $txt .= $_;
00788   }
00789 
00790   return $txt;
00791 }
00792 
00793 1;
00794