Archive Ensembl HomeArchive Ensembl Home
GeneSet.pm
Go to the documentation of this file.
00001 #
00002 # You may distribute this module under the same terms as perl itself
00003 #
00004 # POD documentation - main docs before the code
00005 
00006 =pod 
00007 
00008 =head1 NAME
00009 
00010 Bio::EnsEMBL::Compara::Production::GeneSet
00011 
00012 =cut
00013 
00014 =head1 SYNOPSIS
00015 
00016 An abstract data class for holding an arbitrary collection of
00017 (ENSEMBLGENE)Member objects and providing set operations and 
00018 cross-reference operations to compare to another GeneSet object.
00019 Also used by HomologySet.
00020 
00021 =cut
00022 
00023 =head1 DESCRIPTION
00024 
00025 A 'set' object of Gene objects.  Uses Member::stable_id to identify unique genes.  
00026 Is used for comparing GeneSet objects with each other and building comparison
00027 matrixes.
00028 
00029 Not really a production object, but more an abstract data type for use by
00030 post analysis scripts.  Placed in Production since I could not think of a better location.
00031 The design of this object essentially was within the homology_diff.pl script
00032 but has now been formalized into a proper object design.
00033 
00034 =cut
00035 
00036 =head1 CONTACT
00037 
00038   Contact Jessica Severin on module implemetation/design detail: jessica@ebi.ac.uk
00039   Contact Abel Ureta-Vidal on EnsEMBL/Compara: abel@ebi.ac.uk
00040   Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
00041 
00042 =cut
00043 
00044 =head1 APPENDIX
00045 
00046 The rest of the documentation details each of the object methods. 
00047 Internal methods are usually preceded with a _
00048 
00049 =cut
00050 
00051 
00052 package Bio::EnsEMBL::Compara::Production::GeneSet;
00053 
00054 use strict;
00055 use Bio::EnsEMBL::Compara::Member;
00056 
00057 use Bio::EnsEMBL::Compara::Graph::CGObject;
00058 our @ISA = qw(Bio::EnsEMBL::Compara::Graph::CGObject);
00059 
00060 
00061 sub init {
00062   my $self = shift;
00063   $self->SUPER::init;
00064   $self->clear;
00065   return $self;
00066 }
00067 
00068 sub dealloc {
00069   my $self = shift;
00070   return $self->SUPER::dealloc;
00071 }
00072 
00073 
00074 sub clear {
00075   my $self = shift;
00076     
00077   $self->{'gene_hash'} = {};
00078 }
00079 
00080 
00081 sub add {
00082   my $self = shift;
00083   my @gene_list = @_; 
00084   
00085   foreach my $gene (@gene_list) {
00086     next if(defined($self->{'gene_hash'}->{$gene->stable_id}));
00087     $self->{'gene_hash'}->{$gene->stable_id} = $gene;
00088   }  
00089   return $self;
00090 }
00091 
00092 
00093 sub merge {
00094   my $self = shift;
00095   my $other_set = shift;
00096   
00097   $self->add(@{$other_set->list});
00098   return $self;
00099 }
00100 
00101 
00102 ### gene ###
00103 
00104 sub size {
00105   my $self = shift;
00106   return scalar(@{$self->list});
00107 }
00108 
00109 sub list {
00110   my $self = shift;
00111   my @genes = values(%{$self->{'gene_hash'}});
00112   return \@genes;
00113 }
00114 
00115 sub includes {
00116   my $self = shift;
00117   my $gene = shift;
00118   return 1 if(defined($self->{'gene_hash'}->{$gene->stable_id}));
00119   return 0;
00120 }
00121 
00122 sub find_gene_like {
00123   my $self = shift;
00124   my $gene = shift;
00125   return $self->{'gene_hash'}->{$gene->stable_id};
00126 }
00127 
00128 
00129 ### debug printing ###
00130 
00131 sub print_stats {
00132   my $self = shift;
00133   
00134   printf("%d unique genes\n", $self->size);
00135 }
00136 
00137 
00138 sub hashref_by_genome {
00139   my $self = shift;
00140   my %types;
00141   foreach my $gene (@{$self->list}) {
00142     unless(defined($types{$gene->genome_db_id})) {
00143       $types{$gene->genome_db_id} = 
00144          new Bio::EnsEMBL::Compara::Production::GeneSet;
00145     }
00146     $types{$gene->genome_db_id}->add($gene);
00147   }
00148   return \%types;
00149 }
00150 
00151 
00152 ############################################
00153 #
00154 # set theory operations
00155 #
00156 ############################################
00157 
00158 sub relative_complement {
00159   my $self = shift;
00160   my $other_set = shift;
00161   
00162   #genes in other_set that are not in my set
00163   my $new_set = new Bio::EnsEMBL::Compara::Production::GeneSet;
00164   foreach my $gene (@{$other_set->list}) {
00165     unless($self->includes($gene)) {
00166       $new_set->add($gene);
00167     }
00168   }
00169   return $new_set;
00170 }
00171 
00172 
00173 sub intersection {
00174   my $self = shift;
00175   my $other_set = shift;
00176   
00177   my $new_set = new Bio::EnsEMBL::Compara::Production::GeneSet;
00178   foreach my $gene (@{$self->list}) {
00179     if($other_set->includes($gene)) {
00180       $new_set->add($gene);
00181     }
00182   }
00183   return $new_set;
00184 }
00185 
00186 
00187 1;