Archive Ensembl HomeArchive Ensembl Home
GenomeDB.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =head1 NAME
00020 
00021 Bio::EnsEMBL::Compara::GenomeDB - DESCRIPTION of Object
00022 
00023 =head1 SYNOPSIS
00024   use Bio::EnsEMBL::Compara::DnaFrag; 
00025   my $genome_db = new Bio::EnsEMBL::Compara::GenomeDB();
00026 
00027 SET VALUES
00028   $genome_db->dbID(22);
00029   $genome_db->dba($dba);
00030   $genome_db->name("Homo sapiens");
00031   $genome_db->assembly("NCBI36");
00032   $genome_db->taxon_id(9606);
00033   $genome_db->taxon($taxon);
00034   $genome_db->genebuild("2006-12-Ensembl");
00035   $genome_db->assembly_default(1);
00036   $genome_db->locator("Bio::EnsEMBL::DBSQL::DBAdaptor/host=???;port=???;user=???;dbname=homo_sapiens_core_51_36m;species=Homo sapiens;disconnect_when_inactive=1");
00037 
00038 GET VALUES
00039   $dbID = $genome_db->dbID;
00040   $genome_db_adaptor = $genome_db->adaptor;
00041   $name = $genome_db->name;
00042   $assembly = $genome_db->assembly;
00043   $taxon_id = $genome_db->taxon_id;
00044   $taxon = $genome_db->taxon;
00045   $genebuild = $genome_db->genebuild;
00046   $assembly_default = $genome_db->assembly_default;
00047   $locator = $genome_db->locator;
00048 
00049 
00050 =head1 DESCRIPTION
00051 
00052 The GenomeDB object stores information about each species including the taxon_id, species name, assembly, genebuild and the location of the core database.
00053 
00054 =head1 APPENDIX
00055 
00056 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
00057 
00058 =cut
00059 
00060 
00061 # Let the code begin...
00062 
00063 
00064 package Bio::EnsEMBL::Compara::GenomeDB;
00065 
00066 use strict;
00067 
00068 use Bio::EnsEMBL::Utils::Exception qw(warning deprecate throw);
00069 use Bio::EnsEMBL::DBLoader;
00070 
00071 =head2 new
00072 
00073   Example :
00074     my $genome_db = new Bio::EnsEMBL::Compara::GenomeDB();
00075     $genome_db->dba($dba);
00076     $genome_db->name("Homo sapiens");
00077     $genome_db->assembly("NCBI36");
00078     $genome_db->taxon_id(9606);
00079     $genome_db->dbID(22);
00080     $genome_db->genebuild("2006-12-Ensembl");
00081 
00082   Description: Creates a new GenomeDB object
00083   Returntype : Bio::EnsEMBL::Compara::GenomeDB
00084   Exceptions : none
00085   Caller     : general
00086   Status     : Stable
00087 
00088 =cut
00089 
00090 sub new {
00091   my($caller, $dba, $name, $assembly, $taxon_id, $dbID, $genebuild) = @_;
00092 
00093   my $class = ref($caller) || $caller;
00094   my $self = bless({}, $class);
00095 
00096   $dba       && $self->db_adaptor($dba);
00097   $name      && $self->name($name);
00098   $assembly  && $self->assembly($assembly);
00099   $taxon_id  && $self->taxon_id($taxon_id);
00100   $dbID      && $self->dbID($dbID);
00101   $genebuild && $self->genebuild($genebuild);
00102 
00103   return $self;
00104 }
00105 
00106 =head2 new_fast
00107 
00108   Arg [1]    : hash reference $hashref
00109   Example    : 
00110   Description: This is an ultra fast constructor which requires knowledge of
00111                the objects internals to be used.
00112   Returntype : Bio::EnsEMBL::Compara::GenomeDB
00113   Exceptions : none
00114   Caller     : Bio::EnsEMBL::Compara::DBSQL::GenomeDBAdaptor
00115   Status     : Stable
00116 
00117 =cut
00118 
00119 sub new_fast {
00120   my $class = shift;
00121   my $hashref = shift;
00122 
00123   return bless $hashref, $class;
00124 }
00125 
00126 
00127 =head2 db_adaptor
00128 
00129   Arg [1]    : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
00130                The DBAdaptor containing sequence information for the genome
00131                represented by this object.
00132   Example    : $gdb->db_adaptor($dba);
00133   Description: Getter/Setter for the DBAdaptor containing sequence 
00134                information for the genome represented by this object.
00135   Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
00136   Exceptions : thrown if the argument is not a
00137                Bio::EnsEMBL::DBSQL::DBAdaptor
00138   Caller     : general
00139   Status     : Stable
00140 
00141 =cut
00142 
00143 sub db_adaptor {
00144   my ( $self, $dba ) = @_;
00145 
00146   eval {
00147       if($dba) {
00148       unless($dba && $dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) {
00149           throw("dba arg must be a Bio::EnsEMBL::DBSQL::DBAdaptor not a [$dba]\n");
00150       }
00151       $self->{'_db_adaptor'} = $dba;
00152       }
00153   };
00154 
00155   $self->{'_db_adaptor'} = undef if $@; # if there was an error force a new db adaptor to be made
00156   
00157   unless (defined $self->{'_db_adaptor'}) {
00158     $self->{'_db_adaptor'} = $self->connect_to_genome_locator;
00159   }
00160   
00161   return $self->{'_db_adaptor'};
00162 }
00163 
00164 
00165 
00166 =head2 name
00167 
00168   Arg [1]    : (optional) string $value
00169   Example    : $gdb->name('Homo sapiens');
00170   Description: Getter setter for the name of this genome database, usually
00171                just the species name.
00172   Returntype : string
00173   Exceptions : none
00174   Caller     : general
00175   Status     : Stable
00176 
00177 =cut
00178 
00179 sub name{
00180   my ($self,$value) = @_;
00181 
00182   if( defined $value) {
00183     $self->{'name'} = $value;
00184   }
00185   return $self->{'name'};
00186 }
00187 
00188 
00189 =head2 short_name
00190 
00191   Example    : $gdb->short_name;
00192   Description: The name of this genome in the Gspe ('G'enera
00193                'spe'cies) format. Can also handle 'G'enera 's'pecies
00194                's'ub 's'pecies (Gsss)
00195   Returntype : string
00196   Exceptions : none
00197   Caller     : general
00198   Status     : Stable
00199 
00200 =cut
00201 
00202 sub short_name {
00203   my $self = shift;
00204   my $name = $self->name;
00205   $name =~ s/\b(\w)/\U$1/g;
00206   $name =~ s/\_/\ /g;
00207   unless( $name =~  s/(\S)\S*\s(\S)\S*\s(\S)\S*\s(\S).*/$1$2$3$4/ ){
00208     unless( $name =~  s/(\S)\S*\s(\S)\S*\s(\S{2,2}).*/$1$2$3/ ){
00209       unless( $name =~  s/(\S)\S*\s(\S{3,3}).*/$1$2/ ){
00210         $name = substr( $name, 0, 4 );
00211       }
00212     }
00213   }
00214   return $name;
00215 }
00216 
00217 =head2 get_short_name
00218 
00219   Example    : $gdb->get_short_name;
00220   Description: The name of this genome in the Gspe ('G'enera
00221                'spe'cies) format. Can also handle 'G'enera 's'pecies
00222                's'ub 's'pecies (Gsss)
00223   Returntype : string
00224   Exceptions : none
00225   Caller     : general
00226   Status     : Stable
00227 
00228 =cut
00229 
00230 sub get_short_name {
00231   my $self = shift;
00232   return $self->short_name;
00233 }
00234 
00235 
00236 =head2 dbID
00237 
00238   Arg [1]    : (optional) int $value the new value of this objects database 
00239                identifier
00240   Example    : $dbID = $genome_db->dbID;
00241   Description: Getter/Setter for the internal identifier of this GenomeDB
00242   Returntype : int
00243   Exceptions : none
00244   Caller     : general
00245   Status     : Stable
00246 
00247 =cut
00248 
00249 sub dbID{
00250    my ($self,$value) = @_;
00251    if( defined $value) {
00252      $self->{'dbID'} = $value;
00253    }
00254    return $self->{'dbID'};
00255 }
00256 
00257 
00258 =head2 adaptor
00259 
00260   Arg [1]    : (optional) Bio::EnsEMBL::Compara::GenomeDBAdaptor $adaptor
00261   Example    : $adaptor = $GenomeDB->adaptor();
00262   Description: Getter/Setter for the GenomeDB object adaptor used
00263                by this GenomeDB for database interaction.
00264   Returntype : Bio::EnsEMBL::Compara::GenomeDBAdaptor
00265   Exceptions : none
00266   Caller     : general
00267   Status     : Stable
00268 
00269 =cut
00270 
00271 sub adaptor{
00272    my ($self,$value) = @_;
00273    if( defined $value) {
00274       $self->{'adaptor'} = $value;
00275    }
00276    return $self->{'adaptor'};
00277 }
00278 
00279 
00280 =head2 assembly
00281 
00282   Arg [1]    : (optional) string
00283   Example    : $gdb->assembly('NCBI36');
00284   Description: Getter/Setter for the assembly type of this genome db.
00285   Returntype : string
00286   Exceptions : none
00287   Caller     : general
00288   Status     : Stable
00289 
00290 =cut
00291 
00292 sub assembly {
00293   my $self = shift;
00294   my $assembly = shift;
00295 
00296   if($assembly) {
00297     $self->{'assembly'} = $assembly;
00298   }
00299   return $self->{'assembly'};
00300 }
00301 
00302 =head2 assembly_default
00303 
00304   Arg [1]    : (optional) int
00305   Example    : $gdb->assembly_default(1);
00306   Description: Getter/Setter for the assembly_default of this genome db.
00307   Returntype : int
00308   Exceptions : none
00309   Caller     : general
00310   Status     : Stable
00311 
00312 =cut
00313 
00314 sub assembly_default {
00315   my $self = shift;
00316   my $boolean = shift;
00317 
00318   if(defined $boolean) {
00319     $self->{'assembly_default'} = $boolean;
00320   }
00321   $self->{'assembly_default'}='1' unless(defined($self->{'assembly_default'}));
00322   return $self->{'assembly_default'};
00323 }
00324 
00325 =head2 genebuild
00326 
00327   Arg [1]    : (optional) string
00328   Example    : $gdb->genebuild('2006-12-Ensembl');
00329   Description: Getter/Setter for the genebuild type of this genome db.
00330   Returntype : string
00331   Exceptions : none
00332   Caller     : general
00333   Status     : Stable
00334 
00335 =cut
00336 
00337 sub genebuild {
00338   my $self = shift;
00339   $self->{'genebuild'} = shift if (@_);
00340   $self->{'genebuild'}='' unless(defined($self->{'genebuild'}));
00341   return $self->{'genebuild'};
00342 }
00343 
00344 
00345 =head2 taxon_id
00346 
00347   Arg [1]    : (optional) int
00348   Example    : $gdb->taxon_id(9606);
00349   Description: Getter/Setter for the taxon id of the contained genome db
00350   Returntype : int
00351   Exceptions : none
00352   Caller     : general
00353   Status     : Stable
00354 
00355 =cut
00356 
00357 sub taxon_id {
00358   my $self = shift;
00359   my $taxon_id = shift;
00360 
00361   if(defined $taxon_id) {
00362     $self->{'taxon_id'} = $taxon_id;
00363   }
00364   return $self->{'taxon_id'};
00365 }
00366 
00367 =head2 taxon
00368 
00369   Description: uses taxon_id to fetch the NCBITaxon object
00370   Returntype : Bio::EnsEMBL::Compara::NCBITaxon object 
00371   Exceptions : if taxon_id or adaptor not defined
00372   Caller     : general
00373   Status     : Stable
00374 
00375 =cut
00376 
00377 sub taxon {
00378   my $self = shift;
00379 
00380   return $self->{'_taxon'} if(defined $self->{'_taxon'});
00381 
00382   unless (defined $self->taxon_id and $self->adaptor) {
00383     throw("can't fetch Taxon without a taxon_id and an adaptor");
00384   }
00385   my $ncbi_taxon_adaptor = $self->adaptor->db->get_NCBITaxonAdaptor;
00386   $self->{'_taxon'} = $ncbi_taxon_adaptor->fetch_node_by_taxon_id($self->{'taxon_id'});
00387   return $self->{'_taxon'};
00388 }
00389 
00390 
00391 =head2 locator
00392 
00393   Arg [1]    : string
00394   Description: Returns a string which describes where the external genome (ensembl core)
00395                database base is located. Locator format is:
00396                "Bio::EnsEMBL::DBSQL::DBAdaptor/host=ecs4port=3351;user=ensro;dbname=mus_musculus_core_20_32"
00397   Returntype : string
00398   Exceptions : none
00399   Caller     : general
00400   Status     : Stable
00401 
00402 =cut
00403 
00404 sub locator {
00405   my $self = shift;
00406   $self->{'locator'} = shift if (@_);
00407   $self->{'locator'}='' unless(defined($self->{'locator'}));
00408   return $self->{'locator'};
00409 }
00410 
00411 =head2 connect_to_genome_locator
00412 
00413   Arg [1]    : string
00414   Description: uses the locator string to connect to the external genome database
00415   Returntype : DBConnection/DBAdaptor defined in locator string
00416               (usually a Bio::EnsEMBL::DBSQL::DBAdaptor)
00417               return undef if locator undefined or unable to connect
00418   Exceptions : none
00419   Caller     : internal private method 
00420   Status     : Stable
00421 
00422 =cut
00423 
00424 sub connect_to_genome_locator
00425 {
00426   my $self = shift;
00427 
00428   return undef if($self->locator eq '');
00429 
00430   my $genomeDBA = undef;
00431   eval {$genomeDBA = Bio::EnsEMBL::DBLoader->new($self->locator); };
00432   return undef unless($genomeDBA);
00433   return $genomeDBA;
00434 }
00435 
00436 
00437 1;