Archive Ensembl HomeArchive Ensembl Home
GeneAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and
00024 storage of Gene objects
00025 
00026 =head1 SYNOPSIS
00027 
00028   use Bio::EnsEMBL::Registry;
00029 
00030   Bio::EnsEMBL::Registry->load_registry_from_db(
00031     -host => 'ensembldb.ensembl.org',
00032     -user => 'anonymous',
00033   );
00034 
00035   $gene_adaptor =
00036     Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" );
00037 
00038   $gene = $gene_adaptor->fetch_by_dbID(1234);
00039 
00040   $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129');
00041 
00042   @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') };
00043 
00044   $slice_adaptor =
00045     Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
00046 
00047   $slice =
00048     $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 );
00049 
00050   @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
00051 
00052 =head1 DESCRIPTION
00053 
00054 This is a database aware adaptor for the retrieval and storage of gene
00055 objects.
00056 
00057 =head1 METHODS
00058 
00059 =cut
00060 
00061 package Bio::EnsEMBL::DBSQL::GeneAdaptor;
00062 
00063 use strict;
00064 
00065 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
00066 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
00067 use Bio::EnsEMBL::DBSQL::SliceAdaptor;
00068 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
00069 use Bio::EnsEMBL::DBSQL::DBAdaptor;
00070 use Bio::EnsEMBL::Gene;
00071 
00072 use vars '@ISA';
00073 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
00074 
00075 
00076 # _tables
00077 #  Arg [1]    : none
00078 #  Description: PROTECTED implementation of superclass abstract method.
00079 #               Returns the names, aliases of the tables to use for queries.
00080 #  Returntype : list of listrefs of strings
00081 #  Exceptions : none
00082 #  Caller     : internal
00083 #  Status     : Stable
00084 
00085 sub _tables {
00086   return (
00087     [ 'gene',           'g' ],
00088     [ 'xref',           'x' ],
00089     [ 'external_db',    'exdb' ] );
00090 }
00091 
00092 
00093 # _columns
00094 #  Arg [1]    : none
00095 #  Example    : none
00096 #  Description: PROTECTED implementation of superclass abstract method.
00097 #               Returns a list of columns to use for queries.
00098 #  Returntype : list of strings
00099 #  Exceptions : none
00100 #  Caller     : internal
00101 #  Status     : Stable
00102 
00103 sub _columns {
00104   my ($self) = @_;
00105 
00106   my $created_date =
00107     $self->db()->dbc()->from_date_to_seconds("g.created_date");
00108   my $modified_date =
00109     $self->db()->dbc()->from_date_to_seconds("g.modified_date");
00110 
00111   return (
00112     'g.gene_id',                 'g.seq_region_id',
00113     'g.seq_region_start',        'g.seq_region_end',
00114     'g.seq_region_strand',       'g.analysis_id',
00115     'g.biotype',                 'g.display_xref_id',
00116     'g.description',             'g.status',
00117     'g.source',                  'g.is_current',
00118     'g.canonical_transcript_id', 'g.canonical_annotation',
00119     'g.stable_id',               'g.version',
00120     $created_date,               $modified_date,
00121     'x.display_label',           'x.dbprimary_acc',
00122     'x.description',             'x.version',
00123     'exdb.db_name',              'exdb.status',
00124     'exdb.db_release',           'exdb.db_display_name',
00125     'x.info_type',               'x.info_text'
00126   );
00127 } ## end sub _columns
00128 
00129 
00130 sub _left_join {
00131   return (
00132     [ 'xref',           "x.xref_id = g.display_xref_id" ],
00133     [ 'external_db',    "exdb.external_db_id = x.external_db_id" ] );
00134 }
00135 
00136 
00137 =head2 list_dbIDs
00138 
00139   Example    : @gene_ids = @{$gene_adaptor->list_dbIDs()};
00140   Description: Gets an array of internal ids for all genes in the current db
00141   Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
00142   Returntype : Listref of Ints
00143   Exceptions : none
00144   Caller     : general
00145   Status     : Stable
00146 
00147 =cut
00148 
00149 sub list_dbIDs {
00150   my ($self,$ordered) = @_;
00151 
00152   return $self->_list_dbIDs("gene",undef, $ordered);
00153 }
00154 
00155 
00156 =head2 list_stable_ids
00157 
00158   Example    : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
00159   Description: Gets an listref of stable ids for all genes in the current db
00160   Returntype : reference to a list of strings
00161   Exceptions : none
00162   Caller     : general
00163   Status     : Stable
00164 
00165 =cut
00166 
00167 sub list_stable_ids {
00168    my ($self) = @_;
00169 
00170    return $self->_list_dbIDs("gene", "stable_id");
00171 }
00172 
00173 
00174 sub list_seq_region_ids {
00175   my $self = shift;
00176 
00177   return $self->_list_seq_region_ids('gene');
00178 }
00179 
00180 =head2 fetch_by_display_label
00181 
00182   Arg [1]    : String $label - display label of gene to fetch
00183   Example    : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2");
00184   Description: Returns the gene which has the given display label or undef if
00185                there is none. If there are more than 1, only the first is
00186                reported.
00187   Returntype : Bio::EnsEMBL::Gene
00188   Exceptions : none
00189   Caller     : general
00190   Status     : Stable
00191 
00192 =cut
00193 
00194 sub fetch_by_display_label {
00195   my $self = shift;
00196   my $label = shift;
00197 
00198   my $constraint = "x.display_label = ? AND g.is_current = 1";
00199   $self->bind_param_generic_fetch($label,SQL_VARCHAR);
00200   my ($gene) = @{ $self->generic_fetch($constraint) };
00201 
00202   return $gene;
00203 }
00204 
00205 
00206 
00207 =head2 fetch_by_stable_id
00208 
00209   Arg [1]    : String $id 
00210                The stable ID of the gene to retrieve
00211   Example    : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944');
00212   Description: Retrieves a gene object from the database via its stable id.
00213                The gene will be retrieved in its native coordinate system (i.e.
00214                in the coordinate system it is stored in the database). It may
00215                be converted to a different coordinate system through a call to
00216                transform() or transfer(). If the gene or exon is not found
00217                undef is returned instead.
00218   Returntype : Bio::EnsEMBL::Gene or undef
00219   Exceptions : if we cant get the gene in given coord system
00220   Caller     : general
00221   Status     : Stable
00222 
00223 =cut
00224 
00225 sub fetch_by_stable_id {
00226   my ($self, $stable_id) = @_;
00227 
00228   my $constraint = "g.stable_id = ? AND g.is_current = 1";
00229   $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
00230   my ($gene) = @{ $self->generic_fetch($constraint) };
00231 
00232   return $gene;
00233 }
00234 
00235 
00236 
00237 =head2 fetch_all_by_biotype 
00238 
00239   Arg [1]    : String $biotype 
00240                listref of $biotypes
00241                The biotype of the gene to retrieve. You can have as an argument a reference
00242                to a list of biotypes
00243   Example    : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding'); 
00244                $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
00245   Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
00246                The genes will be retrieved in its native coordinate system (i.e.
00247                in the coordinate system it is stored in the database). It may
00248                be converted to a different coordinate system through a call to
00249                transform() or transfer(). If the gene or exon is not found
00250                undef is returned instead.
00251   Returntype  : listref of Bio::EnsEMBL::Gene
00252   Exceptions : if we cant get the gene in given coord system
00253   Caller     : general
00254   Status     : Stable
00255 
00256 =cut
00257 
00258 sub fetch_all_by_biotype {
00259   my ($self, $biotype) = @_;
00260 
00261   if (!defined $biotype){
00262       throw("Biotype or listref of biotypes expected");
00263   }
00264   my $constraint;
00265   if (ref($biotype) eq 'ARRAY'){
00266       $constraint = "g.biotype IN (";
00267       foreach my $b (@{$biotype}){
00268       $constraint .= "?,";    
00269       $self->bind_param_generic_fetch($b,SQL_VARCHAR);
00270       }
00271       chop($constraint); #remove last , from expression
00272       $constraint .= ") and g.is_current = 1";
00273       
00274   }
00275   else{
00276       $constraint = "g.biotype = ? and g.is_current = 1";
00277       $self->bind_param_generic_fetch($biotype,SQL_VARCHAR);
00278   }
00279   my @genes  = @{ $self->generic_fetch($constraint) };
00280   return \@genes ;
00281 }
00282 
00283 
00284 sub fetch_all {
00285   my ($self) = @_;
00286 
00287   my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1';
00288   my @genes  = @{ $self->generic_fetch($constraint) };
00289   return \@genes ;
00290 }
00291 
00292 
00293 =head2 fetch_all_versions_by_stable_id 
00294 
00295   Arg [1]     : String $stable_id 
00296                 The stable ID of the gene to retrieve
00297   Example     : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
00298                   ('ENSG00000148944');
00299   Description : Similar to fetch_by_stable_id, but retrieves all versions of a
00300                 gene stored in the database.
00301   Returntype  : listref of Bio::EnsEMBL::Gene
00302   Exceptions  : if we cant get the gene in given coord system
00303   Caller      : general
00304   Status      : At Risk
00305 
00306 =cut
00307 
00308 sub fetch_all_versions_by_stable_id {
00309   my ($self, $stable_id) = @_;
00310 
00311   my $constraint = "g.stable_id = ?";
00312   $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
00313   return $self->generic_fetch($constraint);
00314 }
00315 
00316 
00317 =head2 fetch_by_exon_stable_id
00318 
00319   Arg [1]    : String $id
00320                The stable id of an exon of the gene to retrieve
00321   Example    : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944');
00322   Description: Retrieves a gene object from the database via an exon stable id.
00323                The gene will be retrieved in its native coordinate system (i.e.
00324                in the coordinate system it is stored in the database). It may
00325                be converted to a different coordinate system through a call to
00326                transform() or transfer(). If the gene or exon is not found
00327                undef is returned instead.
00328   Returntype : Bio::EnsEMBL::Gene or undef
00329   Exceptions : none
00330   Caller     : general
00331   Status     : Stable
00332 
00333 =cut
00334 
00335 sub fetch_by_exon_stable_id {
00336   my ($self, $stable_id, $version) = @_;
00337   
00338   my $sql = qq(
00339       SELECT t.gene_id
00340         FROM transcript as t,
00341              exon_transcript as et,
00342              exon as e
00343        WHERE t.transcript_id = et.transcript_id 
00344          AND et.exon_id = e.exon_id
00345          AND e.stable_id = ?
00346          AND e.is_current = 1
00347   );
00348 
00349   my $sth = $self->prepare($sql);
00350   $sth->bind_param(1, $stable_id, SQL_VARCHAR);
00351   $sth->execute();
00352 
00353   my ($dbID) = $sth->fetchrow_array();
00354 
00355   return undef if(!defined($dbID));
00356 
00357   my $gene = $self->fetch_by_dbID($dbID);
00358 
00359   return $gene;
00360 }
00361 
00362 
00363 =head2 fetch_all_by_domain
00364 
00365   Arg [1]    : String $domain
00366                The domain to fetch genes from
00367   Example    : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
00368   Description: Retrieves a listref of genes whose translation contain interpro
00369                domain $domain. The genes are returned in their native coord
00370                system (i.e. the coord_system they are stored in). If the coord
00371                system needs to be changed, then tranform or transfer should be
00372                called on the individual objects returned.
00373   Returntype : list of Bio::EnsEMBL::Genes
00374   Exceptions : none
00375   Caller     : domainview
00376   Status     : Stable
00377 
00378 =cut
00379 
00380 sub fetch_all_by_domain {
00381   my ($self, $domain) = @_;
00382 
00383   throw("domain argument is required") unless ($domain);
00384 
00385   my $sth = $self->prepare(qq(
00386   SELECT    tr.gene_id
00387   FROM      interpro i,
00388             protein_feature pf,
00389             transcript tr,
00390             translation tl,
00391             seq_region sr,
00392             coord_system cs
00393   WHERE     cs.species_id = ?
00394     AND     cs.coord_system_id = sr.coord_system_id
00395     AND     sr.seq_region_id = tr.seq_region_id
00396     AND     tr.is_current = 1
00397     AND     tr.transcript_id = tl.transcript_id
00398     AND     tl.translation_id = pf.translation_id
00399     AND     pf.hit_name = i.id
00400     AND     i.interpro_ac = ?
00401   GROUP BY  tr.gene_id));
00402 
00403   $sth->bind_param( 1, $self->species_id(), SQL_VARCHAR );
00404   $sth->bind_param( 2, $domain,             SQL_VARCHAR );
00405 
00406   $sth->execute();
00407 
00408   my @array = @{$sth->fetchall_arrayref()};
00409   $sth->finish();
00410   
00411   my @gene_ids = map {$_->[0]} @array;
00412 
00413   return $self->fetch_all_by_dbID_list(\@gene_ids);
00414 }
00415 
00416 
00417 
00418 =head2 fetch_all_by_Slice_and_external_dbname_link
00419 
00420   Arg [1]    : Bio::EnsEMBL::Slice $slice
00421                The slice to fetch genes on.
00422   Arg [2]    : (optional) string $logic_name
00423                the logic name of the type of features to obtain
00424   Arg [3]    : (optional) boolean $load_transcripts
00425                if true, transcripts will be loaded immediately
00426                rather than lazy loaded later.
00427   Arg [4]    : Name of the external database
00428   Example    : @genes = @{
00429                  $ga->fetch_all_by_Slice_and_external_dbname_link(
00430                                           $slice, undef, undef, "HUGO" ) };
00431   Description: Overrides superclass method to optionally load
00432                transcripts immediately rather than lazy-loading them
00433                later.  This is more efficient when there are a lot
00434                of genes whose transcripts are going to be used. The
00435                genes are then filtered to return only those with
00436                external database links of the type specified
00437   Returntype : reference to list of genes
00438   Exceptions : thrown if exon cannot be placed on transcript slice
00439   Caller     : 
00440   Status     : Stable
00441 
00442 =cut
00443 
00444 sub fetch_all_by_Slice_and_external_dbname_link {
00445   my ( $self, $slice, $logic_name, $load_transcripts, $db_name ) = @_;
00446 
00447   # Get the external_db_id(s) from the name.
00448   my $sth = $self->prepare(
00449             "SELECT external_db_id FROM external_db WHERE db_name = ?");
00450 
00451   $sth->bind_param( 1, $db_name, SQL_VARCHAR );
00452   $sth->execute();
00453 
00454   my $external_db_id;
00455   $sth->bind_columns( \$external_db_id );
00456 
00457   my @external_db_ids;
00458   while ( $sth->fetch() ) {
00459     push( @external_db_ids, $external_db_id );
00460   }
00461 
00462   if ( scalar(@external_db_ids) == 0 ) {
00463     warn sprintf( "Could not find external database "
00464                     . "'%s' in the external_db table\n"
00465                     . "Available are:\n",
00466                   $db_name );
00467 
00468     $sth = $self->prepare("SELECT DISTINCT db_name FROM external_db");
00469 
00470     $sth->execute();
00471     $sth->bind_columns( \$external_db_id );
00472 
00473     while ( $sth->fetch() ) {
00474       warn "\t$external_db_id\n";
00475     }
00476     return [];
00477   }
00478 
00479   # Get the gene_ids for those with links.
00480   my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
00481 
00482   my %linked_genes;
00483   foreach $external_db_id (@external_db_ids) {
00484     my @linked_genes =
00485       $dbe_adaptor->list_gene_ids_by_external_db_id($external_db_id);
00486 
00487     foreach my $gene_id (@linked_genes) {
00488       $linked_genes{$gene_id} = 1;
00489     }
00490   }
00491 
00492   # Get all the genes on the slice.
00493   my $genes = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
00494                                       'g.is_current = 1', $logic_name );
00495 
00496   # Create a list of those that are in the gene_ids list.
00497   my @genes_passed;
00498   foreach my $gene (@$genes) {
00499     if ( exists( $linked_genes{ $gene->dbID() } ) ) {
00500       push( @genes_passed, $gene );
00501     }
00502   }
00503 
00504   # Return the list of those that passed.
00505   return \@genes_passed;
00506 } ## end sub fetch_all_by_Slice_and_external_dbname_link
00507 
00508 =head2 fetch_all_by_Slice
00509 
00510   Arg [1]    : Bio::EnsEMBL::Slice $slice
00511                The slice to fetch genes on.
00512   Arg [2]    : (optional) string $logic_name
00513                the logic name of the type of features to obtain
00514   Arg [3]    : (optional) boolean $load_transcripts
00515                if true, transcripts will be loaded immediately rather than
00516                lazy loaded later.
00517   Arg [4]    : (optional) string $source
00518                the source name of the features to obtain.
00519   Arg [5]    : (optional) string biotype
00520                 the biotype of the features to obtain.
00521   Example    : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
00522   Description: Overrides superclass method to optionally load transcripts
00523                immediately rather than lazy-loading them later.  This
00524                is more efficient when there are a lot of genes whose
00525                transcripts are going to be used.
00526   Returntype : reference to list of genes 
00527   Exceptions : thrown if exon cannot be placed on transcript slice
00528   Caller     : Slice::get_all_Genes
00529   Status     : Stable
00530 
00531 =cut
00532 
00533 sub fetch_all_by_Slice {
00534   my ( $self, $slice, $logic_name, $load_transcripts, $source,
00535     $biotype ) = @_;
00536 
00537   my $constraint = 'g.is_current = 1';
00538 
00539   if ( defined($source) ) {
00540     $constraint .= " and g.source = '$source'";
00541   }
00542   if ( defined($biotype) ) {
00543     $constraint .= " and g.biotype = '$biotype'";
00544   }
00545 
00546   my $genes =
00547     $self->SUPER::fetch_all_by_Slice_constraint( $slice, $constraint,
00548     $logic_name );
00549 
00550   # If there are less than two genes, still do lazy-loading.
00551   if ( !$load_transcripts || @$genes < 2 ) {
00552     return $genes;
00553   }
00554 
00555   # Preload all of the transcripts now, instead of lazy loading later,
00556   # faster than one query per transcript.
00557 
00558   # First check if transcripts are already preloaded.
00559   # FIXME: Should check all transcripts.
00560   if ( exists( $genes->[0]->{'_transcript_array'} ) ) {
00561     return $genes;
00562   }
00563 
00564   # Get extent of region spanned by transcripts.
00565   my ( $min_start, $max_end );
00566   foreach my $g (@$genes) {
00567     if ( !defined($min_start) || $g->seq_region_start() < $min_start ) {
00568       $min_start = $g->seq_region_start();
00569     }
00570     if ( !defined($max_end) || $g->seq_region_end() > $max_end ) {
00571       $max_end = $g->seq_region_end();
00572     }
00573   }
00574 
00575   my $ext_slice;
00576 
00577   if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) {
00578     $ext_slice = $slice;
00579   } else {
00580     my $sa = $self->db()->get_SliceAdaptor();
00581     $ext_slice = $sa->fetch_by_region(
00582       $slice->coord_system->name(), $slice->seq_region_name(),
00583       $min_start,                   $max_end,
00584       $slice->strand(),             $slice->coord_system->version() );
00585   }
00586 
00587   # Associate transcript identifiers with genes.
00588 
00589   my %g_hash = map { $_->dbID => $_ } @{$genes};
00590 
00591   my $g_id_str = join( ',', keys(%g_hash) );
00592 
00593   my $sth =
00594     $self->prepare( "SELECT gene_id, transcript_id "
00595       . "FROM   transcript "
00596       . "WHERE  gene_id IN ($g_id_str)" );
00597 
00598   $sth->execute();
00599 
00600   my ( $g_id, $tr_id );
00601   $sth->bind_columns( \( $g_id, $tr_id ) );
00602 
00603   my %tr_g_hash;
00604 
00605   while ( $sth->fetch() ) {
00606     $tr_g_hash{$tr_id} = $g_hash{$g_id};
00607   }
00608 
00609   my $ta          = $self->db()->get_TranscriptAdaptor();
00610   my $transcripts = $ta->fetch_all_by_Slice(
00611     $ext_slice,
00612     1, undef,
00613     sprintf( "t.transcript_id IN (%s)",
00614       join( ',', sort { $a <=> $b } keys(%tr_g_hash) ) ) );
00615 
00616   # Move transcripts onto gene slice, and add them to genes.
00617   foreach my $tr ( @{$transcripts} ) {
00618     if ( !exists( $tr_g_hash{ $tr->dbID() } ) ) { next }
00619 
00620     my $new_tr;
00621     if ( $slice != $ext_slice ) {
00622       $new_tr = $tr->transfer($slice);
00623       if ( !defined($new_tr) ) {
00624         throw("Unexpected. "
00625             . "Transcript could not be transfered onto Gene slice." );
00626       }
00627     } else {
00628       $new_tr = $tr;
00629     }
00630 
00631     $tr_g_hash{ $tr->dbID() }->add_Transcript($new_tr);
00632   }
00633 
00634   return $genes;
00635 } ## end sub fetch_all_by_Slice
00636 
00637 =head2 fetch_by_transcript_id
00638 
00639   Arg [1]    : Int $trans_id
00640                Unique database identifier for the transcript whose gene should
00641                be retrieved. The gene is returned in its native coord
00642                system (i.e. the coord_system it is stored in). If the coord
00643                system needs to be changed, then tranform or transfer should
00644                be called on the returned object. undef is returned if the
00645                gene or transcript is not found in the database.
00646   Example    : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
00647   Description: Retrieves a gene from the database via the database identifier
00648                of one of its transcripts.
00649   Returntype : Bio::EnsEMBL::Gene
00650   Exceptions : none
00651   Caller     : general
00652   Status     : Stable
00653 
00654 =cut
00655 
00656 sub fetch_by_transcript_id {
00657   my ($self, $trans_id) = @_;
00658 
00659   # this is a cheap SQL call
00660   my $sth = $self->prepare(qq(
00661       SELECT tr.gene_id
00662       FROM transcript tr
00663       WHERE tr.transcript_id = ?
00664   ));
00665 
00666   $sth->bind_param(1, $trans_id, SQL_INTEGER);
00667   $sth->execute();
00668 
00669   my ($geneid) = $sth->fetchrow_array();
00670 
00671   $sth->finish();
00672 
00673   return undef if( !defined $geneid );
00674 
00675   my $gene = $self->fetch_by_dbID($geneid);
00676   return $gene;
00677 }
00678 
00679 
00680 =head2 fetch_by_transcript_stable_id
00681 
00682   Arg [1]    : string $trans_stable_id
00683                transcript stable ID whose gene should be retrieved
00684   Example    : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
00685                  ('ENST0000234');
00686   Description: Retrieves a gene from the database via the stable ID of one of
00687                its transcripts
00688   Returntype : Bio::EnsEMBL::Gene
00689   Exceptions : none
00690   Caller     : general
00691   Status     : Stable
00692 
00693 =cut
00694 
00695 sub fetch_by_transcript_stable_id {
00696     my ($self, $trans_stable_id) = @_;
00697 
00698     my $sth = $self->prepare(qq(
00699         SELECT  gene_id
00700     FROM    transcript
00701         WHERE   stable_id = ?
00702         AND     is_current = 1
00703     ));
00704 
00705     $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
00706     $sth->execute();
00707 
00708     my ($geneid) = $sth->fetchrow_array();
00709     $sth->finish;
00710     
00711     return undef if (!defined $geneid);
00712 
00713     my $gene = $self->fetch_by_dbID($geneid);
00714     return $gene;
00715 }
00716 
00717 
00718 =head2 fetch_by_translation_stable_id
00719 
00720   Arg [1]    : String $translation_stable_id
00721                The stable id of a translation of the gene to be obtained
00722   Example    : my $gene = $gene_adaptor->fetch_by_translation_stable_id
00723                  ('ENSP00000278194');
00724   Description: Retrieves a gene via the stable id of one of its translations.
00725   Returntype : Bio::EnsEMBL::Gene
00726   Exceptions : none
00727   Caller     : general
00728   Status     : Stable
00729 
00730 =cut
00731 
00732 sub fetch_by_translation_stable_id {
00733     my ($self, $translation_stable_id) = @_;
00734 
00735     my $sth = $self->prepare(qq(
00736         SELECT  tr.gene_id
00737     FROM    transcript tr,
00738                 translation tl
00739     WHERE   tl.stable_id = ?
00740         AND     tr.transcript_id = tl.transcript_id
00741         AND     tr.is_current = 1
00742     ));
00743 
00744     $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);
00745     $sth->execute();
00746 
00747     my ($geneid) = $sth->fetchrow_array();
00748     $sth->finish;
00749     if( !defined $geneid ) {
00750         return undef;
00751     }
00752     return $self->fetch_by_dbID($geneid);
00753 }
00754 
00755 
00756 
00757 
00758 =head2 fetch_all_by_external_name
00759 
00760   Arg [1]    : String $external_name
00761                The external identifier for the gene to be obtained
00762   Arg [2]    : (optional) String $external_db_name
00763                The name of the external database from which the
00764                identifier originates.
00765   Example    : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')}
00766                @many_genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA%')}
00767   Description: Retrieves a list of genes with an external database
00768                identifier $external_name. The genes returned are in
00769                their native coordinate system, i.e. in the coordinate
00770                system they are stored in the database in.  If another
00771                coordinate system is required then the Gene::transfer or
00772                Gene::transform method can be used.
00773                SQL wildcards % and _ are supported in the $external_name
00774   Returntype : listref of Bio::EnsEMBL::Gene
00775   Exceptions : none
00776   Caller     : goview, general
00777   Status     : Stable
00778 
00779 =cut
00780 
00781 sub fetch_all_by_external_name {
00782   my ( $self, $external_name, $external_db_name ) = @_;
00783 
00784   my $entryAdaptor = $self->db->get_DBEntryAdaptor();
00785 
00786   my @ids =
00787     $entryAdaptor->list_gene_ids_by_extids( $external_name,
00788                                             $external_db_name );
00789 
00790   my %genes_by_dbIDs =
00791     map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) };
00792 
00793   my @result = map { $genes_by_dbIDs{$_} } @ids;
00794 
00795   return \@result;
00796 }
00797 
00798 =head2 fetch_all_by_GOTerm
00799 
00800   Arg [1]   : Bio::EnsEMBL::OntologyTerm
00801               The GO term for which genes should be fetched.
00802 
00803   Example:  @genes = @{
00804               $gene_adaptor->fetch_all_by_GOTerm(
00805                 $go_adaptor->fetch_by_accession('GO:0030326') ) };
00806 
00807   Description   : Retrieves a list of genes that are associated with
00808                   the given GO term, or with any of its descendent
00809                   GO terms.  The genes returned are in their native
00810                   coordinate system, i.e. in the coordinate system
00811                   in which they are stored in the database.  If
00812                   another coordinate system is required then the
00813                   Gene::transfer or Gene::transform method can be
00814                   used.
00815 
00816   Return type   : listref of Bio::EnsEMBL::Gene
00817   Exceptions    : Throws of argument is not a GO term
00818   Caller        : general
00819   Status        : Stable
00820 
00821 =cut
00822 
00823 sub fetch_all_by_GOTerm {
00824   my ( $self, $term ) = @_;
00825 
00826   assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' );
00827   if ( $term->ontology() ne 'GO' ) {
00828     throw('Argument is not a GO term');
00829   }
00830 
00831   my $entryAdaptor = $self->db->get_DBEntryAdaptor();
00832 
00833   my %unique_dbIDs;
00834   foreach my $accession ( map { $_->accession() }
00835                           ( $term, @{ $term->descendants() } ) )
00836   {
00837     my @ids =
00838       $entryAdaptor->list_gene_ids_by_extids( $accession, 'GO' );
00839     foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
00840   }
00841 
00842   my @result = @{
00843     $self->fetch_all_by_dbID_list(
00844                               [ sort { $a <=> $b } keys(%unique_dbIDs) ]
00845     ) };
00846 
00847   return \@result;
00848 } ## end sub fetch_all_by_GOTerm
00849 
00850 =head2 fetch_all_by_GOTerm_accession
00851 
00852   Arg [1]   : String
00853               The GO term accession for which genes should be
00854               fetched.
00855 
00856   Example   :
00857 
00858     @genes =
00859       @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
00860         'GO:0030326') };
00861 
00862   Description   : Retrieves a list of genes that are associated with
00863                   the given GO term, or with any of its descendent
00864                   GO terms.  The genes returned are in their native
00865                   coordinate system, i.e. in the coordinate system
00866                   in which they are stored in the database.  If
00867                   another coordinate system is required then the
00868                   Gene::transfer or Gene::transform method can be
00869                   used.
00870 
00871   Return type   : listref of Bio::EnsEMBL::Gene
00872   Exceptions    : Throws of argument is not a GO term accession
00873   Caller        : general
00874   Status        : Stable
00875 
00876 =cut
00877 
00878 sub fetch_all_by_GOTerm_accession {
00879   my ( $self, $accession ) = @_;
00880 
00881   if ( $accession !~ /^GO:/ ) {
00882     throw('Argument is not a GO term accession');
00883   }
00884 
00885   my $goAdaptor =
00886     Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology',
00887                                          'OntologyTerm' );
00888 
00889   my $term = $goAdaptor->fetch_by_accession($accession);
00890 
00891   return $self->fetch_all_by_GOTerm($term);
00892 }
00893 
00894 =head2 fetch_all_alt_alleles
00895 
00896   Arg [1]    : Bio::EnsEMBL::Gene $gene
00897                The gene to fetch alternative alleles for
00898   Example    : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) };
00899                foreach my $alt_gene (@alt_genes) {
00900                  print "Alternate allele: " . $alt_gene->stable_id() . "\n";
00901                }
00902   Description: Retrieves genes which are alternate alleles to a provided gene.
00903                Alternate alleles in Ensembl are genes which are similar and are
00904                on an alternative haplotype of the same region. There are not 
00905                currently very many of these. This method will return a 
00906                reference to an empty list if no alternative alleles are found.
00907   Returntype : listref of Bio::EnsEMBL::Genes
00908   Exceptions : throw if incorrect arg provided
00909                warning if gene arg does not have dbID
00910   Caller     : Gene::get_all_alt_alleles
00911   Status     : Stable
00912 
00913 =cut
00914 
00915 sub fetch_all_alt_alleles {
00916   my $self = shift;
00917   my $gene = shift;
00918 
00919   if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
00920     throw('Bio::EnsEMBL::Gene argument is required');
00921   }
00922 
00923   my $gene_id = $gene->dbID();
00924 
00925   if(!$gene_id) {
00926     warning('Cannot retrieve alternate alleles for gene without dbID');
00927     return [];
00928   }
00929 
00930   my $sth = $self->prepare("SELECT aa1.gene_id " .
00931                            "FROM   alt_allele aa1, alt_allele aa2 " .
00932                            "WHERE  aa1.alt_allele_id = aa2.alt_allele_id " .
00933                            "AND    aa2.gene_id = ? " .
00934                            "AND    aa1.gene_id <> ?");
00935 
00936   $sth->bind_param(1, $gene_id, SQL_INTEGER);
00937   $sth->bind_param(2, $gene_id, SQL_INTEGER);
00938   $sth->execute();
00939 
00940   my @alt_ids;
00941   my $row;
00942   while($row = $sth->fetchrow_arrayref()) {
00943     push @alt_ids, $row->[0];
00944   } 
00945   $sth->finish();
00946   
00947   if (@alt_ids) {
00948     return $self->fetch_all_by_dbID_list(\@alt_ids);
00949   }
00950   
00951   return [];
00952 }
00953 
00954 sub is_ref{
00955   my ( $self, $gene_id) = @_;
00956   my $is_not_ref;
00957 
00958   # easier to find if it is not an alt_Allele do this and then negate it.
00959   my $sth = $self->prepare("select count(1) from alt_allele where gene_id = $gene_id and is_ref = 0");
00960   $sth->execute();
00961   $sth->bind_columns(\$is_not_ref);
00962   $sth->fetch;
00963 
00964   if(defined($is_not_ref) and $is_not_ref){
00965     return 0;
00966   }
00967  
00968   return 1;
00969 }
00970 
00971 
00972 =head2 store_alt_alleles
00973 
00974 
00975   Arg [1]    : reference to list of Bio::EnsEMBL::Genes $genes
00976   Example    : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]);
00977   Description: This method creates a group of alternative alleles (i.e. locus)
00978                from a set of genes. The genes should be genes from alternate
00979                haplotypes which are similar. The genes must already be stored
00980                in this database. At least 2 genes must be in the list reference
00981                provided.
00982   Returntype : none
00983   Exceptions : throw on incorrect arguments
00984                throw on sql error (e.g. duplicate unique id)
00985   Caller     : general
00986   Status     : Stable
00987 
00988 =cut
00989 
00990 sub store_alt_alleles {
00991   my $self = shift;
00992   my $genes = shift;
00993 
00994   if(!ref($genes) eq 'ARRAY') {
00995     throw('List reference of Bio::EnsEMBL::Gene argument expected.');
00996   }
00997 
00998   my $num_genes = scalar(@$genes);
00999 
01000   if($num_genes < 2) {
01001     throw("At least 2 genes must be provided to construct alternate alleles.");
01002   }
01003 
01004   return if(!@$genes);
01005   
01006   #
01007   #insert the first gene seperately in order to get a unique identifier for
01008   #the set of alleles
01009   #
01010   my $gene = $genes->[0];
01011 
01012   if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
01013     throw('List reference of Bio::EnsEMBL::Gene argument expected.');
01014   }
01015 
01016   my $gene_id = $gene->dbID();
01017 
01018   if (!$gene_id) {
01019     throw("Genes must have dbIDs in order to construct alternate alleles.");
01020   }
01021 
01022   my $sth = $self->prepare("INSERT INTO alt_allele (gene_id) VALUES (?)");
01023   $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
01024   $sth->execute();
01025   
01026   my $alt_allele_id = $sth->{'mysql_insertid'};
01027   $sth->finish();
01028 
01029   #
01030   # Insert all subsequent alt alleles using the alt_allele identifier
01031   # from the first insert
01032   #
01033 
01034   $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, gene_id) " .
01035                         "VALUES (?,?)");
01036   
01037   for (my $i = 1; $i < $num_genes; $i++) {
01038     my $gene = $genes->[$i];
01039 
01040     if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
01041       throw("List reference of Bio::EnsEMBL::Gene argument expected"); 
01042     }
01043     
01044     $gene_id = $gene->dbID();
01045     
01046     if (!$gene_id) {
01047       # This is an error but we have already inserted into the database
01048       # delete the already inserted entries to restore the state of the
01049       # database
01050       $sth->finish();
01051       $sth->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?");
01052       $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
01053       $sth->execute();
01054       $sth->finish();
01055       throw('Genes must have dbIDs in order to construct alternate alleles.');
01056     }
01057 
01058     $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
01059     $sth->bind_param(2, $gene_id, SQL_INTEGER);
01060     eval {
01061     $sth->execute();
01062     };
01063 
01064     if ($@) {
01065       # an error occured, revert the db to the previous state
01066       $sth = $self->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?");
01067       $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
01068       $sth->execute();
01069       $sth->finish();
01070       throw("An SQL error occured inserting alternate alleles:\n$@");
01071     }
01072   }
01073   
01074   $sth->finish();
01075 
01076   return;
01077 }
01078 
01079 
01080 =head2 store
01081 
01082   Arg [1]    : Bio::EnsEMBL::Gene $gene
01083                The gene to store in the database
01084   Arg [2]    : ignore_release in xrefs [default 1] set to 0 to use release info 
01085                in external database references
01086   Example    : $gene_adaptor->store($gene);
01087   Description: Stores a gene in the database.
01088   Returntype : the database identifier (dbID) of the newly stored gene
01089   Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if 
01090                $gene does not have an analysis object
01091   Caller     : general
01092   Status     : Stable
01093 
01094 =cut
01095 
01096 sub store {
01097   my ($self, $gene, $ignore_release) = @_;
01098 
01099   if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
01100     throw("Must store a gene object, not a $gene");
01101   }
01102   if(!defined($ignore_release)){
01103     $ignore_release = 1;
01104   }
01105   my $db = $self->db();
01106 
01107   if ($gene->is_stored($db)) {
01108     return $gene->dbID();
01109   }
01110 
01111   # ensure coords are correct before storing
01112   $gene->recalculate_coordinates();
01113 
01114   my $analysis = $gene->analysis();
01115   throw("Genes must have an analysis object.") if(!defined($analysis));
01116 
01117   my $analysis_id;
01118   if ($analysis->is_stored($db)) {
01119     $analysis_id = $analysis->dbID();
01120   } else {
01121     $analysis_id = $db->get_AnalysisAdaptor->store($analysis);
01122   }
01123 
01124   my $type = $gene->biotype || "";
01125 
01126   # default to is_current = 1 if this attribute is not set
01127   my $is_current = $gene->is_current;
01128   $is_current = 1 unless (defined($is_current));
01129 
01130   my $original             = $gene;
01131   my $original_transcripts = $gene->get_all_Transcripts();
01132 
01133   my $seq_region_id;
01134 
01135   ( $gene, $seq_region_id ) = $self->_pre_store($gene);
01136 
01137   my $store_gene_sql = qq(
01138         INSERT INTO gene
01139            SET biotype = ?,
01140                analysis_id = ?,
01141                seq_region_id = ?,
01142                seq_region_start = ?,
01143                seq_region_end = ?,
01144                seq_region_strand = ?,
01145                description = ?,
01146                source = ?,
01147                status = ?,
01148                is_current = ?,
01149                canonical_transcript_id = ?,
01150                canonical_annotation = ?
01151   );
01152 
01153   if (defined($gene->stable_id)) {
01154       my $created = $self->db->dbc->from_seconds_to_date($gene->created_date());
01155       my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date());
01156       $store_gene_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified;
01157 
01158   }
01159 
01160   # column status is used from schema version 34 onwards (before it was
01161   # confidence)
01162 
01163   my $sth = $self->prepare($store_gene_sql);
01164   $sth->bind_param( 1,  $type,                SQL_VARCHAR );
01165   $sth->bind_param( 2,  $analysis_id,         SQL_INTEGER );
01166   $sth->bind_param( 3,  $seq_region_id,       SQL_INTEGER );
01167   $sth->bind_param( 4,  $gene->start(),       SQL_INTEGER );
01168   $sth->bind_param( 5,  $gene->end(),         SQL_INTEGER );
01169   $sth->bind_param( 6,  $gene->strand(),      SQL_TINYINT );
01170   $sth->bind_param( 7,  $gene->description(), SQL_LONGVARCHAR );
01171   $sth->bind_param( 8,  $gene->source(),      SQL_VARCHAR );
01172   $sth->bind_param( 9,  $gene->status(),      SQL_VARCHAR );
01173   $sth->bind_param( 10, $is_current,          SQL_TINYINT );
01174 
01175   # Canonical transcript ID will be updated later.
01176   # Set it to zero for now.
01177   $sth->bind_param( 11, 0, SQL_TINYINT );
01178 
01179   $sth->bind_param( 12, $gene->canonical_annotation(), SQL_VARCHAR );
01180 
01181   if ( defined($gene->stable_id) ) {
01182 
01183      $sth->bind_param( 13, $gene->stable_id, SQL_VARCHAR );
01184      my $version = ($gene->version()) ? $gene->version() : 1;
01185      $sth->bind_param( 14, $version, SQL_INTEGER );
01186   }
01187 
01188   $sth->execute();
01189   $sth->finish();
01190 
01191   my $gene_dbID = $sth->{'mysql_insertid'};
01192 
01193   # store the dbentries associated with this gene
01194   my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
01195 
01196   foreach my $dbe ( @{ $gene->get_all_DBEntries } ) {
01197     $dbEntryAdaptor->store( $dbe, $gene_dbID, "Gene", $ignore_release );
01198   }
01199 
01200   # We allow transcripts not to share equal exons and instead have
01201   # copies.  For the database we still want sharing though, to have
01202   # easier time with stable ids. So we need to have a step to merge
01203   # exons together before store.
01204   my %exons;
01205 
01206   foreach my $trans ( @{$gene->get_all_Transcripts} ) {
01207     foreach my $e ( @{$trans->get_all_Exons} ) {
01208       my $key = $e->hashkey();
01209       if( exists $exons{ $key } ) {
01210         $trans->swap_exons( $e, $exons{$key} );
01211       } else {
01212         $exons{$key} = $e;
01213       }
01214     }
01215   }
01216 
01217   my $transcript_adaptor = $db->get_TranscriptAdaptor();
01218 
01219   my $transcripts = $gene->get_all_Transcripts();
01220 
01221   my $new_canonical_transcript_id;
01222   for ( my $i = 0; $i < @$transcripts; $i++ ) {
01223     my $new = $transcripts->[$i];
01224     my $old = $original_transcripts->[$i];
01225 
01226     $transcript_adaptor->store( $new, $gene_dbID, $analysis_id );
01227 
01228     if ( !defined($new_canonical_transcript_id)
01229          && $new->is_canonical() )
01230     {
01231       $new_canonical_transcript_id = $new->dbID();
01232     }
01233 
01234     # update the original transcripts since we may have made copies of
01235     # them by transforming the gene
01236     $old->dbID( $new->dbID() );
01237     $old->adaptor( $new->adaptor() );
01238 
01239     if ( $new->translation ) {
01240       $old->translation->dbID( $new->translation()->dbID );
01241       $old->translation->adaptor( $new->translation()->adaptor );
01242     }
01243   }
01244 
01245   if ( defined($new_canonical_transcript_id) ) {
01246     # Now the canonical transcript has been stored, so update the
01247     # canonical_transcript_id of this gene with the new dbID.
01248     my $sth = $self->prepare(
01249       q(
01250       UPDATE gene
01251       SET canonical_transcript_id = ?
01252       WHERE gene_id = ?)
01253     );
01254 
01255     $sth->bind_param( 1, $new_canonical_transcript_id, SQL_INTEGER );
01256     $sth->bind_param( 2, $gene_dbID, SQL_INTEGER );
01257 
01258     $sth->execute();
01259     $sth->finish();
01260   }
01261 
01262   # update gene to point to display xref if it is set
01263   if(my $display_xref = $gene->display_xref) {
01264     my $dxref_id;
01265     if($display_xref->is_stored($db)) {
01266       $dxref_id = $display_xref->dbID();
01267     } else {
01268       $dxref_id = $dbEntryAdaptor->exists($display_xref);
01269     }
01270 
01271     if(defined($dxref_id)) {
01272       my $sth = $self->prepare
01273         ("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?");
01274       $sth->bind_param(1, $dxref_id, SQL_INTEGER);
01275       $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
01276       $sth->execute();
01277       $sth->finish();
01278       $display_xref->dbID($dxref_id);
01279       $display_xref->adaptor($dbEntryAdaptor);
01280       $display_xref->dbID($dxref_id);
01281       $display_xref->adaptor($dbEntryAdaptor);
01282     } else {
01283       warning("Display_xref ".$display_xref->dbname().":".
01284               $display_xref->display_id() . " is not stored in database.\n".
01285               "Not storing relationship to this gene.");
01286       $display_xref->dbID(undef);
01287       $display_xref->adaptor(undef);
01288     }
01289   }
01290 
01291   # store gene attributes if there are any
01292   my $attr_adaptor = $db->get_AttributeAdaptor();
01293   $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes);
01294 
01295   # store unconventional transcript associations if there are any
01296   my $utaa = $db->get_UnconventionalTranscriptAssociationAdaptor();
01297   foreach my $uta (@{$gene->get_all_unconventional_transcript_associations()}) {
01298     $utaa->store($uta);
01299   }
01300 
01301   # set the adaptor and dbID on the original passed in gene not the
01302   # transfered copy
01303   $original->adaptor($self);
01304   $original->dbID($gene_dbID);
01305 
01306   return $gene_dbID;
01307 }
01308 
01309 
01310 =head2 remove
01311 
01312   Arg [1]    : Bio::EnsEMBL::Gene $gene
01313                the gene to remove from the database
01314   Example    : $gene_adaptor->remove($gene);
01315   Description: Removes a gene completely from the database. All associated
01316                transcripts, exons, stable_identifiers, descriptions, etc.
01317                are removed as well. Use with caution!
01318   Returntype : none
01319   Exceptions : throw on incorrect arguments 
01320                warning if gene is not stored in this database
01321   Caller     : general
01322   Status     : Stable
01323 
01324 =cut
01325 
01326 sub remove {
01327   my $self = shift;
01328   my $gene = shift;
01329 
01330   if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
01331     throw("Bio::EnsEMBL::Gene argument expected.");
01332   }
01333 
01334   if ( !$gene->is_stored($self->db()) ) {
01335     warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " .
01336             "this database.");
01337     return;
01338   }
01339 
01340   # remove all object xrefs associated with this gene
01341 
01342   my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
01343   foreach my $dbe (@{$gene->get_all_DBEntries()}) {
01344     $dbe_adaptor->remove_from_object($dbe, $gene, 'Gene');
01345   }
01346 
01347   # remove all alternative allele entries associated with this gene
01348   my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?");
01349   $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
01350   $sth->execute();
01351   $sth->finish();
01352 
01353   # remove the attributes associated with this transcript
01354   my $attrib_adaptor = $self->db->get_AttributeAdaptor;  
01355   $attrib_adaptor->remove_from_Gene($gene);
01356 
01357   # remove all of the transcripts associated with this gene
01358   my $transcriptAdaptor = $self->db->get_TranscriptAdaptor();
01359   foreach my $trans ( @{$gene->get_all_Transcripts()} ) {
01360     $transcriptAdaptor->remove($trans);
01361   }
01362 
01363   # remove any unconventional transcript associations involving this gene
01364 
01365   $sth =
01366     $self->prepare( "DELETE FROM unconventional_transcript_association "
01367                     . "WHERE gene_id = ? " );
01368   $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
01369   $sth->execute();
01370   $sth->finish();
01371 
01372   # remove this gene from the database
01373 
01374   $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? ");
01375   $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
01376   $sth->execute();
01377   $sth->finish();
01378 
01379   # unset the gene identifier and adaptor thereby flagging it as unstored
01380 
01381   $gene->dbID(undef);
01382   $gene->adaptor(undef);
01383 
01384   return;
01385 }
01386 
01387 
01388 =head2 get_Interpro_by_geneid
01389 
01390   Arg [1]    : String $gene_stable_id
01391                The stable ID of the gene to obtain
01392   Example    : @i = @{
01393                   $gene_adaptor->get_Interpro_by_geneid(
01394                     $gene->stable_id() ) };
01395   Description: Gets interpro accession numbers by gene stable id. A hack really
01396                - we should have a much more structured system than this.
01397   Returntype : listref of strings (Interpro_acc:description)
01398   Exceptions : none 
01399   Caller     : domainview
01400   Status     : Stable
01401 
01402 =cut
01403 
01404 sub get_Interpro_by_geneid {
01405   my ($self, $gene_stable_id) = @_;
01406  
01407   my $sql = qq(
01408   SELECT    i.interpro_ac,
01409             x.description
01410   FROM      transcript t,
01411             translation tl,
01412             protein_feature pf,
01413             interpro i,
01414             xref x,
01415             gene g
01416   WHERE     g.stable_id = ?
01417     AND     t.gene_id = g.gene_id
01418     AND     t.is_current = 1
01419     AND     tl.transcript_id = t.transcript_id
01420     AND     tl.translation_id = pf.translation_id
01421     AND     i.id = pf.hit_name
01422     AND     i.interpro_ac = x.dbprimary_acc);
01423 
01424   my $sth = $self->prepare($sql);
01425 
01426   $sth->bind_param( 1, $gene_stable_id, SQL_VARCHAR );
01427 
01428   $sth->execute;
01429 
01430   my @out;
01431   my %h;
01432   while( (my $arr = $sth->fetchrow_arrayref()) ) {
01433     if( $h{$arr->[0]} ) { next; }
01434     $h{$arr->[0]}=1;
01435     my $string = $arr->[0] .":".$arr->[1];
01436     push(@out,$string);
01437   }
01438 
01439   return \@out;
01440 }
01441 
01442 
01443 =head2 update
01444 
01445   Arg [1]    : Bio::EnsEMBL::Gene $gene
01446                The gene to update
01447   Example    : $gene_adaptor->update($gene);
01448   Description: Updates the type, analysis, display_xref, status, is_current and
01449                description of a gene in the database.
01450   Returntype : None
01451   Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene
01452   Caller     : general
01453   Status     : Stable
01454 
01455 =cut
01456 
01457 sub update {
01458   my ($self, $gene) = @_;
01459   my $update = 0;
01460 
01461   if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
01462     throw("Must update a gene object, not a $gene");
01463   }
01464 
01465   my $update_gene_sql = qq(
01466        UPDATE gene
01467           SET biotype = ?,
01468               analysis_id = ?,
01469               display_xref_id = ?,
01470               status = ?,
01471               description = ?,
01472               is_current = ?,
01473               canonical_transcript_id = ?,
01474               canonical_annotation = ?
01475         WHERE gene_id = ?
01476   );
01477 
01478   my $display_xref = $gene->display_xref();
01479   my $display_xref_id;
01480 
01481   if ( $display_xref && $display_xref->dbID() ) {
01482     $display_xref_id = $display_xref->dbID();
01483   } else {
01484     $display_xref_id = undef;
01485   }
01486 
01487   my $sth = $self->prepare( $update_gene_sql );
01488 
01489   $sth->bind_param( 1, $gene->biotype(),        SQL_VARCHAR );
01490   $sth->bind_param( 2, $gene->analysis->dbID(), SQL_INTEGER );
01491   $sth->bind_param( 3, $display_xref_id,        SQL_INTEGER );
01492   $sth->bind_param( 4, $gene->status(),         SQL_VARCHAR );
01493   $sth->bind_param( 5, $gene->description(),    SQL_VARCHAR );
01494   $sth->bind_param( 6, $gene->is_current(),     SQL_TINYINT );
01495 
01496   if ( defined( $gene->canonical_transcript() ) ) {
01497     $sth->bind_param( 7, $gene->canonical_transcript()->dbID(),
01498       SQL_INTEGER );
01499   } else {
01500     $sth->bind_param( 7, 0, SQL_INTEGER );
01501   }
01502 
01503   $sth->bind_param( 8, $gene->canonical_annotation(), SQL_VARCHAR );
01504   $sth->bind_param( 9, $gene->dbID(), SQL_INTEGER );
01505 
01506   $sth->execute();
01507 
01508   # maybe should update stable id ???
01509 }
01510 
01511 
01512 # _objs_from_sth
01513 
01514 #  Arg [1]    : StatementHandle $sth
01515 #  Arg [2]    : Bio::EnsEMBL::AssemblyMapper $mapper
01516 #  Arg [3]    : Bio::EnsEMBL::Slice $dest_slice
01517 #  Description: PROTECTED implementation of abstract superclass method.
01518 #               responsible for the creation of Genes
01519 #  Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system
01520 #  Exceptions : none
01521 #  Caller     : internal
01522 #  Status     : Stable
01523 
01524 sub _objs_from_sth {
01525   my ($self, $sth, $mapper, $dest_slice) = @_;
01526 
01527   #
01528   # This code is ugly because an attempt has been made to remove as many
01529   # function calls as possible for speed purposes.  Thus many caches and
01530   # a fair bit of gymnastics is used.
01531   #
01532 
01533   my $sa             = $self->db()->get_SliceAdaptor();
01534   my $aa             = $self->db()->get_AnalysisAdaptor();
01535   my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
01536 
01537   my @genes;
01538   my %analysis_hash;
01539   my %slice_hash;
01540   my %sr_name_hash;
01541   my %sr_cs_hash;
01542 
01543   my (
01544     $gene_id,                 $seq_region_id,
01545     $seq_region_start,        $seq_region_end,
01546     $seq_region_strand,       $analysis_id,
01547     $biotype,                 $display_xref_id,
01548     $gene_description,        $status,
01549     $source,                  $is_current,
01550     $canonical_transcript_id, $canonical_annotation,
01551     $stable_id,               $version,
01552     $created_date,            $modified_date,
01553     $xref_display_id,         $xref_primary_acc,
01554     $xref_desc,               $xref_version,
01555     $external_db,             $external_status,
01556     $external_release,        $external_db_name,
01557     $info_type,               $info_text
01558   );
01559 
01560   $sth->bind_columns(
01561     \(
01562       $gene_id,                 $seq_region_id,
01563       $seq_region_start,        $seq_region_end,
01564       $seq_region_strand,       $analysis_id,
01565       $biotype,                 $display_xref_id,
01566       $gene_description,        $status,
01567       $source,                  $is_current,
01568       $canonical_transcript_id, $canonical_annotation,
01569       $stable_id,               $version,
01570       $created_date,            $modified_date,
01571       $xref_display_id,         $xref_primary_acc,
01572       $xref_desc,               $xref_version,
01573       $external_db,             $external_status,
01574       $external_release,        $external_db_name,
01575       $info_type,               $info_text
01576     ) );
01577 
01578   my $asm_cs;
01579   my $cmp_cs;
01580   my $asm_cs_vers;
01581   my $asm_cs_name;
01582   my $cmp_cs_vers;
01583   my $cmp_cs_name;
01584 
01585   if($mapper) {
01586     $asm_cs = $mapper->assembled_CoordSystem();
01587     $cmp_cs = $mapper->component_CoordSystem();
01588     $asm_cs_name = $asm_cs->name();
01589     $asm_cs_vers = $asm_cs->version();
01590     $cmp_cs_name = $cmp_cs->name();
01591     $cmp_cs_vers = $cmp_cs->version();
01592   }
01593 
01594   my $dest_slice_start;
01595   my $dest_slice_end;
01596   my $dest_slice_strand;
01597   my $dest_slice_length;
01598   my $dest_slice_sr_name;
01599   my $dest_slice_sr_id;
01600 
01601   if($dest_slice) {
01602     $dest_slice_start  = $dest_slice->start();
01603     $dest_slice_end    = $dest_slice->end();
01604     $dest_slice_strand = $dest_slice->strand();
01605     $dest_slice_length = $dest_slice->length();
01606     $dest_slice_sr_name = $dest_slice->seq_region_name();
01607     $dest_slice_sr_id = $dest_slice->get_seq_region_id();
01608   }
01609 
01610   FEATURE: while($sth->fetch()) {
01611     #get the analysis object
01612     my $analysis = $analysis_hash{$analysis_id} ||=
01613       $aa->fetch_by_dbID($analysis_id);
01614 
01615     #need to get the internal_seq_region, if present
01616     $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
01617     my $slice = $slice_hash{"ID:".$seq_region_id};
01618 
01619     if(!$slice) {
01620       $slice = $sa->fetch_by_seq_region_id($seq_region_id);
01621       $slice_hash{"ID:".$seq_region_id} = $slice;
01622       $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
01623       $sr_cs_hash{$seq_region_id} = $slice->coord_system();
01624     }
01625 
01626     my $sr_name = $sr_name_hash{$seq_region_id};
01627     my $sr_cs   = $sr_cs_hash{$seq_region_id};
01628 
01629     #
01630     # remap the feature coordinates to another coord system 
01631     # if a mapper was provided
01632     #
01633     if($mapper) {
01634 
01635       ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
01636         $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
01637              $seq_region_strand, $sr_cs);
01638 
01639       #skip features that map to gaps or coord system boundaries
01640       next FEATURE if(!defined($seq_region_id));
01641 
01642       #get a slice in the coord system we just mapped to
01643 #      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
01644         $slice = $slice_hash{"ID:".$seq_region_id} ||=
01645           $sa->fetch_by_seq_region_id($seq_region_id);
01646 #      } else {
01647 #        $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
01648 #          $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
01649 #                               $asm_cs_vers);
01650 #      }
01651     }
01652 
01653     #
01654     # If a destination slice was provided convert the coords.
01655     #
01656     if ( defined($dest_slice) ) {
01657       if ( $dest_slice_strand == 1 ) {
01658         # Positive strand.
01659 
01660         $seq_region_start = $seq_region_start - $dest_slice_start + 1;
01661         $seq_region_end   = $seq_region_end - $dest_slice_start + 1;
01662 
01663         if ( $dest_slice->is_circular() ) {
01664           # Handle cicular chromosomes.
01665 
01666           if ( $seq_region_start > $seq_region_end ) {
01667             # Looking at a feature overlapping the chromsome origin.
01668 
01669             if ( $seq_region_end > $dest_slice_start ) {
01670               # Looking at the region in the beginning of the
01671               # chromosome.
01672               $seq_region_start -= $dest_slice->seq_region_length();
01673             }
01674 
01675             if ( $seq_region_end < 0 ) {
01676               $seq_region_end += $dest_slice->seq_region_length();
01677             }
01678 
01679           } else {
01680 
01681             if (    $dest_slice_start > $dest_slice_end
01682                  && $seq_region_end < 0 )
01683             {
01684               # Looking at the region overlapping the chromosome
01685               # origin and a feature which is at the beginning of the
01686               # chromosome.
01687               $seq_region_start += $dest_slice->seq_region_length();
01688               $seq_region_end   += $dest_slice->seq_region_length();
01689             }
01690           }
01691 
01692         } ## end if ( $dest_slice->is_circular...)
01693 
01694       } else {
01695         # Negative strand.
01696 
01697         if (    $dest_slice->is_circular()
01698              && $seq_region_start > $seq_region_end )
01699         {
01700           # Handle cicular chromosomes.
01701 
01702           if ( $seq_region_end > $dest_slice_start ) {
01703             # Looking at the region in the beginning of the
01704             # chromosome.
01705             $seq_region_start = $dest_slice_end - $seq_region_end + 1;
01706             $seq_region_end =
01707               $seq_region_end -
01708               $dest_slice->seq_region_length -
01709               $dest_slice_start + 1;
01710           } else {
01711             my $tmp_seq_region_start = $seq_region_start;
01712             $seq_region_start =
01713               $dest_slice_end -
01714               $seq_region_end -
01715               $dest_slice->seq_region_length + 1;
01716             $seq_region_end =
01717               $dest_slice_end - $tmp_seq_region_start + 1;
01718           }
01719 
01720         } else {
01721           # Non-circular chromosome.
01722 
01723           my $tmp_seq_region_start = $seq_region_start;
01724           $seq_region_start = $dest_slice_end - $seq_region_end + 1;
01725           $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
01726         }
01727 
01728         $seq_region_strand = -$seq_region_strand;
01729 
01730       } ## end else [ if ( $dest_slice_strand...)]
01731 
01732       # Throw away features off the end of the requested slice or on
01733       # different seq_region.
01734 
01735       if (    $seq_region_end < 1
01736            || $seq_region_start > $dest_slice_length
01737            || ( $dest_slice_sr_id ne $seq_region_id ) )
01738       {
01739         next FEATURE;
01740       }
01741 
01742       $slice = $dest_slice;
01743     } ## end if ( defined($dest_slice...))
01744 
01745     my $display_xref;
01746 
01747     if ($display_xref_id) {
01748       $display_xref = Bio::EnsEMBL::DBEntry->new_fast( {
01749           'dbID'            => $display_xref_id,
01750           'adaptor'         => $dbEntryAdaptor,
01751           'display_id'      => $xref_display_id,
01752           'primary_id'      => $xref_primary_acc,
01753           'version'         => $xref_version,
01754           'description'     => $xref_desc,
01755           'release'         => $external_release,
01756           'dbname'          => $external_db,
01757           'db_display_name' => $external_db_name,
01758           'info_type'       => $info_type,
01759           'info_text'       => $info_text
01760       } );
01761       $display_xref->status($external_status);
01762     }
01763 
01764     # Finally, create the new Gene.
01765     push(
01766       @genes,
01767       $self->_create_feature_fast(
01768         'Bio::EnsEMBL::Gene',
01769         {
01770           'analysis'      => $analysis,
01771           'biotype'       => $biotype,
01772           'start'         => $seq_region_start,
01773           'end'           => $seq_region_end,
01774           'strand'        => $seq_region_strand,
01775           'adaptor'       => $self,
01776           'slice'         => $slice,
01777           'dbID'          => $gene_id,
01778           'stable_id'     => $stable_id,
01779           'version'       => $version,
01780           'created_date'  => $created_date || undef,
01781           'modified_date' => $modified_date || undef,
01782           'description'   => $gene_description,
01783           'external_name'   => undef,              # will use display_id
01784                                                    # from display_xref
01785           'external_db'     => $external_db,
01786           'external_status' => $external_status,
01787           'display_xref'    => $display_xref,
01788           'status'          => $status,
01789           'source'          => $source,
01790           'is_current'      => $is_current,
01791           'canonical_transcript_id' => $canonical_transcript_id,
01792           'canonical_annotation'    => $canonical_annotation
01793         } ) );
01794 
01795   }
01796 
01797   return \@genes;
01798 }
01799 
01800 
01801 =head2 cache_gene_seq_mappings
01802 
01803   Example    : $gene_adaptor->cache_gene_seq_mappings();
01804   Description: caches all the assembly mappings needed for genes
01805   Returntype : None
01806   Exceptions : None
01807   Caller     : general
01808   Status     : At Risk
01809              : New experimental code
01810 
01811 =cut
01812 
01813 sub cache_gene_seq_mappings {
01814   my ($self) = @_;
01815 
01816   # get the sequence level to map too
01817 
01818   my $sql =
01819       'SELECT name '
01820     . 'FROM coord_system '
01821     . 'WHERE attrib like "%%sequence_level%%"'
01822     . 'AND species_id = ?';
01823 
01824   my $sth = $self->prepare($sql);
01825   $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
01826   $sth->execute();
01827 
01828   my $sequence_level = $sth->fetchrow_array();
01829 
01830   $sth->finish();
01831 
01832   my $csa = $self->db->get_CoordSystemAdaptor();
01833   my $ama = $self->db->get_AssemblyMapperAdaptor();
01834 
01835   my $cs1 = $csa->fetch_by_name($sequence_level);
01836 
01837   # get level to map to two
01838 
01839   my $mcc   = $self->db->get_MetaCoordContainerAdaptor();
01840   my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene');
01841 
01842   foreach my $cs2 (@$csnew) {
01843     my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
01844     $am->register_all();
01845   }
01846 
01847 } ## end sub cache_gene_seq_mappings
01848 
01849 
01850 =head2 fetch_all_by_exon_supporting_evidence
01851 
01852   Arg [1]    : String $hit_name
01853                Name of supporting feature
01854   Arg [2]    : String $feature_type 
01855                one of "dna_align_feature" or "protein_align_feature"
01856   Arg [3]    : (optional) Bio::Ensembl::Analysis
01857   Example    : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence(
01858                   'XYZ', 'dna_align_feature');
01859   Description: Gets all the genes with transcripts with exons which have a
01860                specified hit on a particular type of feature. Optionally filter
01861                by analysis.
01862   Returntype : Listref of Bio::EnsEMBL::Gene
01863   Exceptions : If feature_type is not of correct type.
01864   Caller     : general
01865   Status     : Stable
01866 
01867 =cut
01868 
01869 sub fetch_all_by_exon_supporting_evidence {
01870   my ($self, $hit_name, $feature_type, $analysis) = @_;
01871 
01872   if ($feature_type !~ /(dna)|(protein)_align_feature/) {
01873     throw("feature type must be dna_align_feature or protein_align_feature");
01874   }
01875 
01876   my $anal_from = ", analysis a " if ($analysis);
01877   my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis);
01878 
01879   my $sql = qq(
01880       SELECT DISTINCT(g.gene_id)
01881         FROM gene g,
01882              transcript t,
01883              exon_transcript et,
01884              supporting_feature sf,
01885              $feature_type f
01886              $anal_from
01887        WHERE g.gene_id = t.gene_id
01888          AND g.is_current = 1
01889          AND t.transcript_id = et.transcript_id
01890          AND et.exon_id = sf.exon_id
01891          AND sf.feature_id = f.${feature_type}_id
01892          AND sf.feature_type = ?
01893          AND f.hit_name=?
01894          $anal_where
01895   );
01896 
01897   my $sth = $self->prepare($sql);
01898 
01899   $sth->bind_param(1, $feature_type, SQL_VARCHAR);
01900   $sth->bind_param(2, $hit_name, SQL_VARCHAR);
01901   $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
01902 
01903   $sth->execute();
01904 
01905   my @genes;
01906 
01907   while ( my $id = $sth->fetchrow_array ) {
01908     my $gene = $self->fetch_by_dbID($id);
01909     push(@genes, $gene) if $gene;
01910   }
01911 
01912   return \@genes;
01913 }
01914 
01915 
01916 =head2 fetch_all_by_transcript_supporting_evidence
01917 
01918   Arg [1]    : String $hit_name
01919                Name of supporting feature
01920   Arg [2]    : String $feature_type 
01921                one of "dna_align_feature" or "protein_align_feature"
01922   Arg [3]    : (optional) Bio::Ensembl::Analysis
01923   Example    : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature');
01924   Description: Gets all the genes with transcripts with evidence for a
01925                specified hit on a particular type of feature. Optionally filter
01926                by analysis.
01927   Returntype : Listref of Bio::EnsEMBL::Gene.
01928   Exceptions : If feature_type is not of correct type.
01929   Caller     : general
01930   Status     : Stable
01931 
01932 =cut
01933 
01934 sub fetch_all_by_transcript_supporting_evidence {
01935   my ($self, $hit_name, $feature_type, $analysis) = @_;
01936 
01937   if($feature_type !~ /(dna)|(protein)_align_feature/) {
01938     throw("feature type must be dna_align_feature or protein_align_feature");
01939   }
01940 
01941   my $anal_from = ", analysis a " if ($analysis);
01942   my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis);
01943 
01944   my $sql = qq(
01945       SELECT DISTINCT(g.gene_id)
01946         FROM gene g,
01947              transcript t,
01948              transcript_supporting_feature sf,
01949              $feature_type f
01950              $anal_from
01951        WHERE g.gene_id = t.gene_id
01952          AND g.is_current = 1
01953          AND t.transcript_id = sf.transcript_id
01954          AND sf.feature_id = f.${feature_type}_id
01955          AND sf.feature_type = ?
01956          AND f.hit_name=?
01957          $anal_where
01958   );
01959 
01960   my $sth = $self->prepare($sql);
01961 
01962   $sth->bind_param(1, $feature_type, SQL_VARCHAR);
01963   $sth->bind_param(2, $hit_name, SQL_VARCHAR);
01964   $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
01965 
01966   $sth->execute();
01967 
01968   my @genes;
01969 
01970   while( my $id = $sth->fetchrow_array ) {
01971     my $gene = $self->fetch_by_dbID($id);
01972     push(@genes, $gene) if $gene;
01973   }
01974 
01975   return \@genes;
01976 }
01977 
01978 =head2 fetch_nearest_Gene_by_Feature
01979 
01980   Arg [1]    : Feature object
01981   Example    : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat);
01982   Description: Gets the nearest gene to the feature 
01983   Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest
01984   Caller     : general
01985   Status     : UnStable
01986 
01987 =cut
01988 
01989 sub fetch_nearest_Gene_by_Feature{
01990   my $self = shift;
01991   my $feat = shift;
01992 
01993   my $stranded = shift; 
01994   my $stream = shift;  # 1 up stream -1 downstream
01995   my @genes;
01996 
01997 
01998   my $strand = $feat->strand;
01999   if(defined($stream) and !$strand){
02000     warn("stream specified but feature has no strand so +ve strand will be used");
02001     $strand = 1;
02002   }
02003   my $min_dist = 999;
02004   my $gene_id = 0;
02005 
02006   my $overlapping = $feat->get_overlapping_Genes();
02007 
02008   return $overlapping if(defined(@{$overlapping}[0]));
02009 
02010   my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice);
02011   my $start = ($feat->start + $feat->slice->start) -1;
02012   my $end   = ($feat->end   + $feat->slice->start) -1;
02013 
02014 
02015   my @gene_ids;
02016   if(!defined($stream) or $stream == 0){
02017 
02018     my $sql1 = "select g.gene_id, (? - g.seq_region_end)  as 'dist' from gene g where ";
02019     if($stranded){
02020       $sql1 .= "g.seq_region_strand = ".$strand." and ";
02021     }
02022     $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
02023     
02024     #
02025     # MAYBE set the result of prepare to be static in case lots of calls.
02026     #
02027     my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
02028     $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
02029     $sql1_sth->bind_columns(\$gene_id, \$min_dist)     || die "Could mot bin columns";
02030  
02031     my $last_dist = 99999999999999999;
02032     while($sql1_sth->fetch()){
02033       if($min_dist <= $last_dist){
02034     push @gene_ids, $gene_id;
02035     $last_dist = $min_dist;
02036       }
02037     }   
02038     $sql1_sth->finish();
02039     
02040 
02041 
02042     my $sql2 = "select g.gene_id, (g.seq_region_start - ?)  as 'dist' from gene g  where ";
02043     if($stranded){
02044       $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
02045     }
02046     $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
02047 
02048     my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
02049     
02050     my ($tmp_min_dist, $tmp_gene_id);
02051     $sql2_sth->execute($end, $seq_region_id, $end)             || die "Could not execute sql";
02052     $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist)     || die "Could mot bin columns";
02053     my $first =1;
02054     while($sql2_sth->fetch()){
02055       if( $tmp_min_dist <= $last_dist){
02056     if($first){
02057       $first = 0;
02058       if($tmp_min_dist < $last_dist){
02059         @gene_ids = (); #reset
02060       }
02061     }   
02062     push @gene_ids, $tmp_gene_id;
02063     $last_dist = $tmp_min_dist;
02064       }
02065     }
02066     $sql2_sth->finish();
02067 
02068 
02069   } 
02070   elsif(($stream*$strand) == 1){
02071     my $sql1 = "select g.gene_id, (? - g.seq_region_end)  as 'dist' from gene g where ";
02072     if($stranded){
02073       $sql1 .= "g.seq_region_strand = ".$strand." and ";
02074     }
02075     $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
02076     
02077     #
02078     # MAYBE set the result of prepare to be static in case lots of calls.
02079     #
02080     my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
02081     $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
02082     $sql1_sth->bind_columns(\$gene_id, \$min_dist)     || die "Could mot bin columns";
02083     
02084     my $last_dist;
02085     my $first = 1;
02086     while($sql1_sth->fetch()){
02087       if($first){
02088     $first = 0;
02089       }
02090       else{
02091     next if ($min_dist > $last_dist);
02092       } 
02093       push @gene_ids, $gene_id;
02094       $last_dist = $min_dist;
02095     }   
02096     $sql1_sth->finish();
02097   } 
02098   elsif(($stream * $strand) == -1){
02099 
02100     my $sql2 = "select g.gene_id, (g.seq_region_start - ?)  as 'dist' from gene g  where ";
02101     if($stranded){
02102       $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
02103     }
02104     $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
02105     
02106     my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
02107     
02108     my ($tmp_min_dist, $tmp_gene_id);
02109     $sql2_sth->execute($end, $seq_region_id, $end)             || die "Could not execute sql";
02110     $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist)     || die "Could mot bin columns";
02111     my $first =1;
02112     my $last_dist;
02113     while($sql2_sth->fetch()){
02114       if($first){
02115     $first = 0; 
02116       } 
02117       else{
02118     next if ($tmp_min_dist > $last_dist);
02119       }
02120       push @gene_ids, $tmp_gene_id;
02121       $last_dist = $tmp_min_dist;
02122     }
02123     $sql2_sth->finish();
02124   }
02125   else{
02126     die "Invalid stream or strand must be -1, 0 or 1\n";
02127   }
02128 
02129 
02130 
02131   foreach my $gene_id (@gene_ids){
02132     push @genes, $self->fetch_by_dbID($gene_id);
02133   }
02134   return \@genes;
02135 
02136 }
02137 
02138 ##########################
02139 #                        #
02140 #  DEPRECATED METHODS    #
02141 #                        #
02142 ##########################
02143 
02144 
02145 =head2 fetch_by_maximum_DBLink
02146 
02147  Description: DEPRECATED - use fetch_all_by_external_name instead
02148 
02149 =cut
02150 
02151 sub fetch_by_maximum_DBLink {
02152   my ($self, $external_id) = @_;
02153   
02154   deprecate( "use fetch_all_by_external_name instead" );
02155 
02156   my $genes=$self->fetch_all_by_external_name($external_id);
02157   
02158   my $biggest;
02159   my $max = 0;
02160   my $size = scalar(@$genes);
02161   if ($size > 0) {
02162     foreach my $gene (@$genes) {
02163       my $size = scalar(@{$gene->get_all_Exons});
02164       if ($size > $max) {
02165     $biggest = $gene;
02166     $max = $size;
02167       }
02168     }
02169     return $biggest;
02170   }
02171   return;
02172 }
02173 
02174 
02175 =head2 get_display_xref
02176 
02177   Description: DEPRECATED use $gene->display_xref
02178 
02179 =cut
02180 
02181 sub get_display_xref {
02182   my ($self, $gene) = @_;
02183 
02184   deprecate( "display xref should retrieved from Gene object directly" );
02185 
02186   if ( !defined $gene ) {
02187     throw("Must call with a Gene object");
02188   }
02189 
02190   my $sth = $self->prepare(qq(
02191       SELECT e.db_name,
02192              x.display_label,
02193              x.xref_id
02194       FROM   gene g, 
02195              xref x, 
02196              external_db e
02197       WHERE  g.gene_id = ?
02198         AND  g.display_xref_id = x.xref_id
02199         AND  x.external_db_id = e.external_db_id
02200   ));
02201 
02202   $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
02203   $sth->execute();
02204 
02205   my ($db_name, $display_label, $xref_id) = $sth->fetchrow_array();
02206   if ( !defined $xref_id ) {
02207     return undef;
02208   }
02209   
02210   my $db_entry = Bio::EnsEMBL::DBEntry->new(
02211      -dbid => $xref_id,
02212      -adaptor => $self->db->get_DBEntryAdaptor(),
02213      -dbname => $db_name,
02214      -display_id => $display_label
02215   );
02216 
02217   return $db_entry;
02218 }
02219 
02220 
02221 =head2 get_description
02222 
02223   Description: DEPRECATED, use gene->get_description
02224 
02225 =cut
02226 
02227 sub get_description {
02228   my ($self, $dbID) = @_;
02229 
02230   deprecate( "Gene description should be loaded on gene retrieval. Use gene->get_description()" );
02231 
02232   if ( !defined $dbID ) {
02233     throw("must call with dbID");
02234   }
02235 
02236   my $sth = $self->prepare("SELECT description 
02237                             FROM   gene_description 
02238                             WHERE  gene_id = ?");
02239   
02240   $sth->bind_param(1, $dbID, SQL_INTEGER);
02241   $sth->execute();
02242 
02243   my @array = $sth->fetchrow_array();
02244   return $array[0];
02245 }
02246 
02247 
02248 =head2 fetch_by_Peptide_id
02249 
02250   Description: DEPRECATED, use fetch_by_translation_stable_id()
02251 
02252 =cut
02253 
02254 sub fetch_by_Peptide_id {
02255   my ( $self, $translation_stable_id) = @_;
02256 
02257   deprecate( "Please use better named fetch_by_translation_stable_id \n".
02258     caller(2) );
02259 
02260   $self->fetch_by_translation_stable_id($translation_stable_id);
02261 }
02262 
02263 
02264 =head2 get_stable_entry_info
02265 
02266   Description: DEPRECATED use $gene->stable_id instead
02267 
02268 =cut
02269 
02270 sub get_stable_entry_info {
02271   my ($self,$gene) = @_;
02272 
02273   deprecated("stable id info is loaded on default, no lazy loading necessary");
02274 
02275   if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
02276     throw("Needs a gene object, not a $gene");
02277   }
02278 
02279   my $created_date = $self->db->dbc->from_date_to_seconds("created_date");
02280   my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date");
02281 
02282   my $sth =
02283     $self->prepare(   "SELECT stable_id, "
02284                     . $created_date . ","
02285                     . $modified_date
02286                     . ", version FROM gene WHERE gene_id = ?" );
02287 
02288   $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
02289   $sth->execute();
02290 
02291   my @array = $sth->fetchrow_array();
02292   $gene->{'stable_id'} = $array[0];
02293   $gene->{'created'}   = $array[1];
02294   $gene->{'modified'}  = $array[2];
02295   $gene->{'version'}   = $array[3];
02296 
02297   return 1;
02298 }
02299 
02300 
02301 =head2 fetch_all_by_DBEntry
02302 
02303   Description: DEPRECATED - Use fetch_all_by_external_name instead
02304 
02305 =cut
02306 
02307 sub fetch_all_by_DBEntry {
02308   my $self = shift;
02309   
02310   deprecate('Use fetch_all_by_external_name instead.');
02311   
02312   return $self->fetch_all_by_external_name(@_);
02313 }
02314 
02315 
02316 1;
02317 
02318