Archive Ensembl HomeArchive Ensembl Home
StableIdMapper.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 =head1 SYNOPSIS
00024 
00025 =head1 DESCRIPTION
00026 
00027 =head1 METHODS
00028 
00029 =cut
00030 
00031 package Bio::EnsEMBL::IdMapping::StableIdMapper;
00032 
00033 use strict;
00034 use warnings;
00035 no warnings 'uninitialized';
00036 
00037 use Bio::EnsEMBL::IdMapping::BaseObject;
00038 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
00039 
00040 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00041 use Bio::EnsEMBL::Utils::ScriptUtils qw(inject path_append);
00042 use Bio::EnsEMBL::IdMapping::ScoredMappingMatrix;
00043 use POSIX qw(strftime);
00044 
00045 
00046 # instance variables
00047 my %debug_mappings;
00048 
00049 
00050 sub new {
00051   my $caller = shift;
00052   my $class = ref($caller) || $caller;
00053   my $self = $class->SUPER::new(@_);
00054 
00055   # inject a StableIdGenerator
00056   #
00057   # If you write your own generators, make sure they extend
00058   # Bio::EnsEMBL::Idmapping::BaseObject and additionally implement these three
00059   # methods: initial_stable_id(), increment_stable_id() and calculate_version().
00060   my $stable_id_generator = $self->conf->param('plugin_stable_id_generator') ||
00061     'Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric';
00062   $self->logger->debug("Using $stable_id_generator to generate stable Ids.\n");
00063   inject($stable_id_generator);
00064   
00065   # create a new StableIdGenerator object
00066   my $generator_instance = $stable_id_generator->new(
00067       -LOGGER       => $self->logger,
00068       -CONF         => $self->conf,
00069       -CACHE        => $self->cache
00070   );
00071   $self->stable_id_generator($generator_instance);
00072 
00073   return $self;
00074 }
00075 
00076 
00077 sub generate_mapping_session {
00078   my $self = shift;
00079 
00080   # only run this method once
00081   return if ($self->mapping_session_date);
00082 
00083   $self->logger->info("Generating new mapping_session...\n");
00084 
00085   $self->mapping_session_date(time);
00086   $self->mapping_session_date_fmt(strftime("%Y-%m-%d %T",
00087     localtime($self->mapping_session_date)));
00088   
00089   my $s_dba = $self->cache->get_DBAdaptor('source');
00090   my $s_dbh = $s_dba->dbc->db_handle;
00091   my $t_dba = $self->cache->get_DBAdaptor('target');
00092   my $t_dbh = $t_dba->dbc->db_handle;
00093 
00094   # check if mapping_session_id was manually set by the configuration
00095   my $mapping_session_id = $self->conf->param('mapping_session_id');
00096   
00097   if ($mapping_session_id) {
00098     
00099     $self->logger->debug("Using manually configured mapping_session_id $mapping_session_id\n", 1);
00100   
00101   } else {
00102 
00103     # calculate mapping_session_id from db
00104     my $sql = qq(SELECT MAX(mapping_session_id) FROM mapping_session);
00105     $mapping_session_id = $self->fetch_value_from_db($s_dbh, $sql);
00106 
00107     unless ($mapping_session_id) {
00108       $self->logger->debug("No previous mapping_session found.\n", 1);
00109     }
00110     
00111     # increment last mapping_session_id
00112     $mapping_session_id++;
00113 
00114     $self->logger->debug("Using mapping_session_id $mapping_session_id\n", 1);
00115   }
00116 
00117   $self->mapping_session_id($mapping_session_id);
00118 
00119   # write old mapping_session table to a file
00120   my $i;
00121   my $fh = $self->get_filehandle('mapping_session.txt', 'tables');
00122 
00123   my $sth1 = $s_dbh->prepare("SELECT * FROM mapping_session");
00124   $sth1->execute;
00125 
00126   while (my @row = $sth1->fetchrow_array) {
00127     $i++;
00128     print $fh join("\t", @row);
00129     print $fh "\n";
00130   }
00131 
00132   $sth1->finish;
00133   
00134   # append the new mapping_session to the file
00135   my $release_sql = qq(
00136     SELECT meta_value FROM meta WHERE meta_key = 'schema_version'
00137   );
00138   my $old_release = $self->fetch_value_from_db($s_dbh, $release_sql);
00139   my $new_release = $self->fetch_value_from_db($t_dbh, $release_sql);
00140   
00141   my $assembly_sql = qq(
00142     SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'
00143   );
00144   my $old_assembly = $self->fetch_value_from_db($s_dbh, $assembly_sql);
00145   my $new_assembly = $self->fetch_value_from_db($t_dbh, $assembly_sql);
00146 
00147   unless ($old_release and $new_release and $old_assembly and $new_assembly) {
00148     $self->logger->warning("Not all data for new mapping_session found:\n", 1);
00149     $self->logger->info("old_release: $old_release, new_release: $new_release");
00150     $self->logger->info("old_assembly: $old_assembly, new_assembly $new_assembly\n", 2);
00151   }
00152 
00153   print $fh join("\t",
00154                  $mapping_session_id,
00155                  $self->conf->param('sourcedbname'),
00156                  $self->conf->param('targetdbname'),
00157                  $old_release,
00158                  $new_release,
00159                  $old_assembly,
00160                  $new_assembly,
00161                  $self->mapping_session_date_fmt);
00162 
00163   print $fh "\n";
00164   close($fh);
00165   
00166   $self->logger->info("Done writing ".++$i." mapping_session entries.\n\n");
00167 }
00168 
00169 
00170 sub map_stable_ids {
00171   my $self = shift;
00172   my $mappings = shift;
00173   my $type = shift;
00174   
00175   unless ($mappings and
00176           $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
00177     throw("Need a Bio::EnsEMBL::IdMapping::MappingList of ${type}s.");
00178   }
00179 
00180   # generate a new mapping_session and write all mapping_session data to a file
00181   $self->generate_mapping_session;
00182 
00183   $self->logger->info("== Stable ID mapping for $type...\n\n", 0, 'stamped');
00184 
00185   # check if there are any objects of this type at all
00186   my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') };
00187   my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') };
00188   unless (scalar(keys %all_sources)) {
00189     $self->logger->info("No cached ${type}s found.\n\n");
00190     return;
00191   }
00192 
00193   my %stats = map { $_ => 0 }
00194     qw(mapped_known mapped_novel new lost_known lost_novel);
00195 
00196   # create some lookup hashes from the mappings
00197   my %sources_mapped = ();
00198   my %targets_mapped = ();
00199   my %scores_by_target = ();
00200 
00201   foreach my $e (@{ $mappings->get_all_Entries }) {
00202     $sources_mapped{$e->source} = $e->target;
00203     $targets_mapped{$e->target} = $e->source;
00204     $scores_by_target{$e->target} = $e->score;
00205   }
00206 
00207   # determine starting stable ID for new assignments
00208   my $new_stable_id = $self->stable_id_generator->initial_stable_id($type);
00209 
00210   #
00211   # assign mapped and new stable IDs
00212   #
00213   foreach my $tid (keys %all_targets) {
00214 
00215     my $t_obj = $all_targets{$tid};
00216     
00217     # a mapping exists, assign stable ID accordingly
00218     if (my $sid = $targets_mapped{$tid}) {
00219       
00220       my $s_obj = $all_sources{$sid};
00221 
00222       # set target's stable ID and created_date
00223       $t_obj->stable_id($s_obj->stable_id);
00224       $t_obj->created_date($s_obj->created_date);
00225 
00226       # calculate and set version
00227       $t_obj->version($self->stable_id_generator->calculate_version(
00228         $s_obj, $t_obj));
00229 
00230       # change modified_date if version changed
00231       if ($s_obj->version == $t_obj->version) {
00232         $t_obj->modified_date($s_obj->modified_date);
00233       } else {
00234         $t_obj->modified_date($self->mapping_session_date);
00235       }
00236 
00237       # create a stable_id_event entry (not for exons)
00238       unless ( $type eq 'exon' ) {
00239         # Only add events when something changed.
00240         if ( !( $s_obj->stable_id eq $t_obj->stable_id &&
00241                 $s_obj->version == $t_obj->version &&
00242                 $scores_by_target{$tid} == 1 ) )
00243         {
00244           my $key = join( "\t",
00245                           $s_obj->stable_id,         $s_obj->version,
00246                           $t_obj->stable_id,         $t_obj->version,
00247                           $self->mapping_session_id, $type,
00248                           $scores_by_target{$tid} );
00249           $self->add_stable_id_event( 'new', $key );
00250         }
00251       }
00252 
00253       # add to debug hash
00254       push @{ $debug_mappings{$type} }, [ $sid, $tid, $t_obj->stable_id ];
00255 
00256       # stats
00257       if ($s_obj->is_known) {
00258         $stats{'mapped_known'}++;
00259       } else {
00260         $stats{'mapped_novel'}++;
00261       }
00262 
00263     # no mapping was found, assign a new stable ID
00264     } else {
00265       
00266       $t_obj->stable_id($new_stable_id);
00267       $t_obj->version(1);
00268       $t_obj->created_date($self->mapping_session_date);
00269       $t_obj->modified_date($self->mapping_session_date);
00270 
00271       # create a stable_id_event entry (not for exons)
00272       unless ($type eq 'exon') {
00273         my $key = join("\t",
00274                        '\N',
00275                        0,
00276                        $t_obj->stable_id,
00277                        $t_obj->version,
00278                        $self->mapping_session_id,
00279                        $type,
00280                        0
00281         );
00282         $self->add_stable_id_event('new', $key);
00283       }
00284 
00285       # increment the stable Id (to be assigned to the next unmapped object)
00286       $new_stable_id = $self->stable_id_generator->increment_stable_id(
00287         $new_stable_id);
00288 
00289       # stats
00290       $stats{'new'}++;
00291 
00292     }
00293 
00294   }
00295   
00296   #
00297   # deletion events for lost sources
00298   # 
00299   my $fh;
00300   if ($type eq 'gene' or $type eq 'transcript') {
00301     $fh = $self->get_filehandle("${type}s_lost.txt", 'debug');
00302   }
00303   
00304   foreach my $sid (keys %all_sources) {
00305 
00306     my $s_obj = $all_sources{$sid};
00307     
00308     # no mapping exists, add deletion event
00309     unless ($sources_mapped{$sid}) {
00310       unless ($type eq 'exon') {
00311         my $key = join("\t",
00312                        $s_obj->stable_id,
00313                        $s_obj->version,
00314                        '\N',
00315                        0,
00316                        $self->mapping_session_id,
00317                        $type,
00318                        0
00319         );
00320         $self->add_stable_id_event('new', $key);
00321       }
00322 
00323       # stats
00324       my $status;
00325       if ($s_obj->is_known) {
00326         $stats{'lost_known'}++;
00327         $status = 'known';
00328       } else {
00329         $stats{'lost_novel'}++;
00330         $status = 'novel';
00331       }
00332 
00333       # log lost genes and transcripts (for debug purposes)
00334       #
00335       # The Java app did this with a separate method
00336       # (StableIdMapper.dumpLostGeneAndTranscripts()) which also claims to log
00337       # losses due to merge. Since at that point this data isn't available yet
00338       # the logging can be done much more efficient here
00339       if ($type eq 'gene' or $type eq 'transcript') {
00340         print $fh $s_obj->stable_id, "\t$status\n";
00341       }
00342     }
00343   }
00344 
00345   close($fh) if (defined($fh));
00346 
00347   #
00348   # write stable IDs to file
00349   #
00350   $self->write_stable_ids_to_file($type, \%all_targets);
00351 
00352   # also generate and write stats to file
00353   $self->generate_mapping_stats($type, \%stats);
00354 
00355   $self->logger->info("Done.\n\n");
00356 }
00357 
00358 
00359 sub generate_similarity_events {
00360   my ( $self, $mappings, $scores, $type ) = @_;
00361 
00362   # argument checks
00363   unless ( $mappings and
00364            $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList') )
00365   {
00366     throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
00367   }
00368 
00369   unless ( $scores and
00370           $scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
00371   {
00372     throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
00373   }
00374 
00375   throw("Need a type (gene|transcript|translation).") unless ($type);
00376 
00377   my $mapped;
00378 
00379   #
00380   # add similarities for mapped entries
00381   #
00382   foreach my $e ( @{ $mappings->get_all_Entries } ) {
00383 
00384     # create lookup hash for mapped sources and targets; we'll need this
00385     # later
00386     $mapped->{'source'}->{ $e->source } = 1;
00387     $mapped->{'target'}->{ $e->target } = 1;
00388 
00389     # loop over all other entries which contain either source or target;
00390     # add similarity if score is within 1.5% of this entry (which is the
00391     # top scorer)
00392     my @others = @{ $scores->get_Entries_for_target( $e->target ) };
00393     push @others, @{ $scores->get_Entries_for_source( $e->source ) };
00394 
00395     while ( my $e2 = shift(@others) ) {
00396 
00397       # skip self
00398       if ( ( $e->source eq $e2->source ) and
00399            ( $e->target eq $e2->target ) )
00400       {
00401         next;
00402       }
00403 
00404       if ( $e2->score > ( $e->score*0.985 ) ) {
00405 
00406         my $s_obj =
00407           $self->cache->get_by_key( "${type}s_by_id", 'source',
00408                                     $e2->source );
00409         my $t_obj =
00410           $self->cache->get_by_key( "${type}s_by_id", 'target',
00411                                     $e2->target );
00412 
00413         my $key = join( "\t",
00414                         $s_obj->stable_id,         $s_obj->version,
00415                         $t_obj->stable_id,         $t_obj->version,
00416                         $self->mapping_session_id, $type,
00417                         $e2->score );
00418         $self->add_stable_id_event( 'similarity', $key );
00419 
00420       }
00421 
00422       # [todo] add overlap hack here? (see Java code)
00423       # probably better solution: let synteny rescoring affect this
00424       # decision
00425     } ## end while ( my $e2 = shift(@others...))
00426 
00427   } ## end foreach my $e ( @{ $mappings...})
00428 
00429   #
00430   # similarities for other entries
00431   #
00432   foreach my $dbtype ( keys %$mapped ) {
00433 
00434     # note: $dbtype will be either 'source' or 'target'
00435     my $m1 = "get_all_${dbtype}s";
00436     my $m2 = "get_Entries_for_${dbtype}";
00437 
00438     foreach my $id ( @{ $scores->$m1 } ) {
00439 
00440       # skip if this is a mapped source/target
00441       if ( $mapped->{$dbtype}->{$id} ) { next }
00442 
00443       my @entries =
00444         sort { $b->score <=> $a->score } @{ $scores->$m2($id) };
00445 
00446       unless (@entries) { next }
00447 
00448       # skip if top score < 0.75
00449       my $top_score = $entries[0]->score;
00450       if ( $top_score < 0.75 ) { next }
00451 
00452       # add similarities for all entries within 5% of top scorer
00453       while ( my $e = shift(@entries) ) {
00454 
00455         if ( $e->score > ( $top_score*0.95 ) ) {
00456 
00457           my $s_obj =
00458             $self->cache->get_by_key( "${type}s_by_id", 'source',
00459                                       $e->source );
00460           my $t_obj =
00461             $self->cache->get_by_key( "${type}s_by_id", 'target',
00462                                       $e->target );
00463 
00464           my $key = join( "\t",
00465                           $s_obj->stable_id,         $s_obj->version,
00466                           $t_obj->stable_id,         $t_obj->version,
00467                           $self->mapping_session_id, $type,
00468                           $e->score );
00469           $self->add_stable_id_event( 'similarity', $key );
00470 
00471         }
00472       }
00473 
00474     } ## end foreach my $id ( @{ $scores...})
00475   } ## end foreach my $dbtype ( keys %$mapped)
00476 
00477 } ## end sub generate_similarity_events
00478 
00479 
00480 sub filter_same_gene_transcript_similarities {
00481   my $self = shift;
00482   my $transcript_scores = shift;
00483 
00484   # argument checks
00485   unless ($transcript_scores and
00486       $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
00487     throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of transcripts.');
00488   }
00489 
00490   # create a new matrix for the filtered entries
00491   my $filtered_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
00492     -DUMP_PATH   => path_append($self->conf->param('basedir'), 'matrix'),
00493     -CACHE_FILE  => 'filtered_transcript_scores.ser',
00494   );
00495 
00496   # lookup hash for all target transcripts
00497   my %all_targets = map { $_->stable_id => 1 }
00498     values %{ $self->cache->get_by_name("transcripts_by_id", 'target') };
00499 
00500   my $i = 0;
00501 
00502   foreach my $e (@{ $transcript_scores->get_all_Entries }) {
00503 
00504     my $s_tr = $self->cache->get_by_key('transcripts_by_id', 'source',
00505       $e->source);
00506     my $s_gene = $self->cache->get_by_key('genes_by_transcript_id', 'source',
00507       $e->source);
00508     my $t_gene = $self->cache->get_by_key('genes_by_transcript_id', 'target',
00509       $e->target);
00510     # workaround for caching issue: only gene objects in 'genes_by_id' cache
00511     # have a stable ID assigned
00512     #$t_gene = $self->cache->get_by_key('genes_by_id', 'target', $t_gene->id);
00513 
00514     #$self->logger->debug("xxx ".join(":", $s_tr->stable_id, $s_gene->stable_id,
00515     #  $t_gene->stable_id)."\n");
00516 
00517     # skip if source and target transcript are in same gene, BUT keep events for
00518     # deleted transcripts
00519     if (($s_gene->stable_id eq $t_gene->stable_id) and
00520       $all_targets{$s_tr->stable_id}) {
00521         $i++;
00522         next;
00523     }
00524 
00525     $filtered_scores->add_Entry($e);
00526   }
00527   
00528   $self->logger->debug("Skipped $i same gene transcript mappings.\n");
00529 
00530   return $filtered_scores;
00531 }
00532 
00533 
00534 sub generate_translation_similarity_events {
00535   my $self = shift;
00536   my $mappings = shift;
00537   my $transcript_scores = shift;
00538 
00539   # argument checks
00540   unless ($mappings and
00541           $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
00542     throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
00543   }
00544 
00545   unless ($transcript_scores and
00546       $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
00547     throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
00548   }
00549 
00550   # create a fake translation scoring matrix
00551   my $translation_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
00552     -DUMP_PATH   => path_append($self->conf->param('basedir'), 'matrix'),
00553     -CACHE_FILE  => 'translation_scores.ser',
00554   );
00555 
00556   foreach my $e (@{ $transcript_scores->get_all_Entries }) {
00557   
00558     my $s_tl = $self->cache->get_by_key('transcripts_by_id', 'source',
00559       $e->source)->translation;
00560     my $t_tl = $self->cache->get_by_key('transcripts_by_id', 'target',
00561       $e->target)->translation;
00562 
00563     # add an entry to the translation scoring matrix using the score of the
00564     # corresponding transcripts
00565     if ($s_tl and $t_tl) {
00566       $translation_scores->add_score($s_tl->id, $t_tl->id, $e->score);
00567     }
00568   }
00569 
00570   # now generate similarity events using this fake scoring matrix
00571   $self->generate_similarity_events($mappings, $translation_scores,
00572     'translation');
00573 }
00574 
00575 
00576 sub write_stable_ids_to_file {
00577   my $self = shift;
00578   my $type = shift;
00579   my $all_targets = shift;
00580   
00581   $self->logger->info("Writing ${type} stable IDs to file...\n");
00582 
00583   my $fh = $self->get_filehandle("${type}_stable_id.txt", 'tables');
00584   
00585   my @sorted_targets = map { $all_targets->{$_} } sort { $a <=> $b }
00586     keys %$all_targets;
00587   
00588   foreach my $obj (@sorted_targets) {
00589 
00590     # check for missing created and modified dates
00591     my $created_date = $obj->created_date;
00592     unless ($created_date) {
00593       #$self->logger->debug("Missing created_date for target ".
00594       #  $obj->to_string."\n", 1);
00595       $created_date = $self->mapping_session_date;
00596     }
00597     
00598     my $modified_date = $obj->modified_date;
00599     unless ($modified_date) {
00600       #$self->logger->debug("Missing modified_date for target ".
00601       #  $obj->to_string."\n", 1);
00602       $modified_date = $self->mapping_session_date;
00603     }
00604     
00605     my $row = join("\t",
00606                    $obj->id,
00607                    $obj->stable_id,
00608                    $obj->version,
00609                    strftime("%Y-%m-%d %T", localtime($created_date)),
00610                    strftime("%Y-%m-%d %T", localtime($modified_date)),
00611     );
00612 
00613     print $fh "$row\n";
00614   }
00615 
00616   close($fh);
00617 
00618   $self->logger->info("Done writing ".scalar(@sorted_targets)." entries.\n\n");
00619 }
00620 
00621 
00622 sub generate_mapping_stats {
00623   my $self = shift;
00624   my $type = shift;
00625   my $stats = shift;
00626 
00627   my $result = ucfirst($type)." mapping results:\n\n";
00628 
00629   my $fmt1 = "%-10s%-10s%-10s%-10s\n";
00630   my $fmt2 = "%-10s%6.0f    %6.0f    %4.2f%%\n";
00631 
00632   $result .= sprintf($fmt1, qw(TYPE MAPPED LOST PERCENTAGE));
00633   $result .= ('-'x40)."\n";
00634 
00635   my $mapped_total = $stats->{'mapped_known'} + $stats->{'mapped_novel'};
00636   my $lost_total = $stats->{'lost_known'} + $stats->{'lost_novel'};
00637   my $known_total = $stats->{'mapped_known'} + $stats->{'lost_known'};
00638   my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'};
00639 
00640   # no split into known and novel for exons
00641   unless ( $type eq 'exon' ) {
00642     $result .= sprintf( $fmt2,
00643     'known',
00644     $stats->{'mapped_known'},
00645     $stats->{'lost_known'},
00646     ($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0)
00647     );
00648 
00649     $result .= sprintf( $fmt2,
00650     'novel',
00651     $stats->{'mapped_novel'},
00652     $stats->{'lost_novel'},
00653     ($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0)
00654     );
00655   } ## end unless ( $type eq 'exon' )
00656 
00657   $result .= sprintf($fmt2, 'total', $mapped_total, $lost_total,
00658     $mapped_total/($known_total + $novel_total)*100);
00659 
00660   # log result
00661   $self->logger->info($result."\n");
00662 
00663   # write result to file
00664   my $fh = $self->get_filehandle("${type}_mapping_stats.txt", 'stats');
00665   print $fh $result;
00666   close($fh);
00667 }
00668 
00669 
00670 sub dump_debug_mappings {
00671   my $self = shift;
00672 
00673   foreach my $type (keys %debug_mappings) {
00674 
00675     $self->logger->debug("Writing $type mappings to debug/${type}_mappings.txt...\n");
00676     
00677     my $fh = $self->get_filehandle("${type}_mappings.txt", 'debug');
00678 
00679     foreach my $row (@{ $debug_mappings{$type} }) {
00680       print $fh join("\t", @$row);
00681       print $fh "\n";
00682     }
00683 
00684     close($fh);
00685 
00686     $self->logger->debug("Done.\n");
00687   }
00688 }
00689 
00690 
00691 sub write_stable_id_events {
00692   my $self = shift;
00693   my $event_type = shift;
00694 
00695   throw("Need an event type (new|similarity).") unless ($event_type);
00696 
00697   $self->logger->debug("Writing $event_type stable_id_events to file...\n");
00698 
00699   my $fh = $self->get_filehandle("stable_id_event_${event_type}.txt", 'tables');
00700   my $i = 0;
00701 
00702   foreach my $event (@{ $self->get_all_stable_id_events($event_type) }) {
00703     print $fh "$event\n";
00704     $i++;
00705   }
00706 
00707   close($fh);
00708   
00709   $self->logger->debug("Done writing $i entries.\n");
00710 }
00711 
00712 
00713 sub add_stable_id_event {
00714   my ($self, $type, $event) = @_;
00715 
00716   # argument check
00717   throw("Need an event type (new|similarity).") unless ($type);
00718 
00719   $self->{'stable_id_events'}->{$type}->{$event} = 1;
00720 }
00721 
00722 
00723 sub get_all_stable_id_events {
00724   my ($self, $type) = @_;
00725 
00726   # argument check
00727   throw("Need an event type (new|similarity).") unless ($type);
00728 
00729   return [ keys %{ $self->{'stable_id_events'}->{$type} } ];
00730 }
00731 
00732 
00733 sub mapping_session_id {
00734   my $self = shift;
00735   $self->{'_mapping_session_id'} = shift if (@_);
00736   return $self->{'_mapping_session_id'};
00737 }
00738 
00739 
00740 sub mapping_session_date {
00741   my $self = shift;
00742   $self->{'_mapping_session_date'} = shift if (@_);
00743   return $self->{'_mapping_session_date'};
00744 }
00745 
00746 
00747 sub mapping_session_date_fmt {
00748   my $self = shift;
00749   $self->{'_mapping_session_date_fmt'} = shift if (@_);
00750   return $self->{'_mapping_session_date_fmt'};
00751 }
00752 
00753 
00754 sub stable_id_generator {
00755   my $self = shift;
00756   $self->{'_stable_id_generator'} = shift if (@_);
00757   return $self->{'_stable_id_generator'};
00758 }
00759 
00760 
00761 1;
00762