GeneAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE 00002 00003 Copyright (c) 1999-2012 The European Bioinformatics Institute and 00004 Genome Research Limited. All rights reserved. 00005 00006 This software is distributed under a modified Apache license. 00007 For license details, please see 00008 00009 http://www.ensembl.org/info/about/code_licence.html 00010 00011 =head1 CONTACT 00012 00013 Please email comments or questions to the public Ensembl 00014 developers list at <dev@ensembl.org>. 00015 00016 Questions may also be sent to the Ensembl help desk at 00017 <helpdesk@ensembl.org>. 00018 00019 =cut 00020 00021 =head1 NAME 00022 00023 Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and 00024 storage of Gene objects 00025 00026 =head1 SYNOPSIS 00027 00028 use Bio::EnsEMBL::Registry; 00029 00030 Bio::EnsEMBL::Registry->load_registry_from_db( 00031 -host => 'ensembldb.ensembl.org', 00032 -user => 'anonymous', 00033 ); 00034 00035 $gene_adaptor = 00036 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" ); 00037 00038 $gene = $gene_adaptor->fetch_by_dbID(1234); 00039 00040 $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129'); 00041 00042 @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') }; 00043 00044 $slice_adaptor = 00045 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" ); 00046 00047 $slice = 00048 $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 ); 00049 00050 @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) }; 00051 00052 =head1 DESCRIPTION 00053 00054 This is a database aware adaptor for the retrieval and storage of gene 00055 objects. 00056 00057 =head1 METHODS 00058 00059 =cut 00060 00061 package Bio::EnsEMBL::DBSQL::GeneAdaptor; 00062 00063 use strict; 00064 00065 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); 00066 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); 00067 use Bio::EnsEMBL::DBSQL::SliceAdaptor; 00068 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; 00069 use Bio::EnsEMBL::DBSQL::DBAdaptor; 00070 use Bio::EnsEMBL::Gene; 00071 00072 use vars '@ISA'; 00073 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor); 00074 00075 00076 # _tables 00077 # Arg [1] : none 00078 # Description: PROTECTED implementation of superclass abstract method. 00079 # Returns the names, aliases of the tables to use for queries. 00080 # Returntype : list of listrefs of strings 00081 # Exceptions : none 00082 # Caller : internal 00083 # Status : Stable 00084 00085 sub _tables { 00086 return ( 00087 [ 'gene', 'g' ], 00088 [ 'xref', 'x' ], 00089 [ 'external_db', 'exdb' ] ); 00090 } 00091 00092 00093 # _columns 00094 # Arg [1] : none 00095 # Example : none 00096 # Description: PROTECTED implementation of superclass abstract method. 00097 # Returns a list of columns to use for queries. 00098 # Returntype : list of strings 00099 # Exceptions : none 00100 # Caller : internal 00101 # Status : Stable 00102 00103 sub _columns { 00104 my ($self) = @_; 00105 00106 my $created_date = 00107 $self->db()->dbc()->from_date_to_seconds("g.created_date"); 00108 my $modified_date = 00109 $self->db()->dbc()->from_date_to_seconds("g.modified_date"); 00110 00111 return ( 00112 'g.gene_id', 'g.seq_region_id', 00113 'g.seq_region_start', 'g.seq_region_end', 00114 'g.seq_region_strand', 'g.analysis_id', 00115 'g.biotype', 'g.display_xref_id', 00116 'g.description', 'g.status', 00117 'g.source', 'g.is_current', 00118 'g.canonical_transcript_id', 'g.canonical_annotation', 00119 'g.stable_id', 'g.version', 00120 $created_date, $modified_date, 00121 'x.display_label', 'x.dbprimary_acc', 00122 'x.description', 'x.version', 00123 'exdb.db_name', 'exdb.status', 00124 'exdb.db_release', 'exdb.db_display_name', 00125 'x.info_type', 'x.info_text' 00126 ); 00127 } ## end sub _columns 00128 00129 00130 sub _left_join { 00131 return ( 00132 [ 'xref', "x.xref_id = g.display_xref_id" ], 00133 [ 'external_db', "exdb.external_db_id = x.external_db_id" ] ); 00134 } 00135 00136 00137 =head2 list_dbIDs 00138 00139 Example : @gene_ids = @{$gene_adaptor->list_dbIDs()}; 00140 Description: Gets an array of internal ids for all genes in the current db 00141 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. 00142 Returntype : Listref of Ints 00143 Exceptions : none 00144 Caller : general 00145 Status : Stable 00146 00147 =cut 00148 00149 sub list_dbIDs { 00150 my ($self,$ordered) = @_; 00151 00152 return $self->_list_dbIDs("gene",undef, $ordered); 00153 } 00154 00155 00156 =head2 list_stable_ids 00157 00158 Example : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()}; 00159 Description: Gets an listref of stable ids for all genes in the current db 00160 Returntype : reference to a list of strings 00161 Exceptions : none 00162 Caller : general 00163 Status : Stable 00164 00165 =cut 00166 00167 sub list_stable_ids { 00168 my ($self) = @_; 00169 00170 return $self->_list_dbIDs("gene", "stable_id"); 00171 } 00172 00173 00174 sub list_seq_region_ids { 00175 my $self = shift; 00176 00177 return $self->_list_seq_region_ids('gene'); 00178 } 00179 00180 =head2 fetch_by_display_label 00181 00182 Arg [1] : String $label - display label of gene to fetch 00183 Example : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2"); 00184 Description: Returns the gene which has the given display label or undef if 00185 there is none. If there are more than 1, only the first is 00186 reported. 00187 Returntype : Bio::EnsEMBL::Gene 00188 Exceptions : none 00189 Caller : general 00190 Status : Stable 00191 00192 =cut 00193 00194 sub fetch_by_display_label { 00195 my $self = shift; 00196 my $label = shift; 00197 00198 my $constraint = "x.display_label = ? AND g.is_current = 1"; 00199 $self->bind_param_generic_fetch($label,SQL_VARCHAR); 00200 my ($gene) = @{ $self->generic_fetch($constraint) }; 00201 00202 return $gene; 00203 } 00204 00205 00206 00207 =head2 fetch_by_stable_id 00208 00209 Arg [1] : String $id 00210 The stable ID of the gene to retrieve 00211 Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944'); 00212 Description: Retrieves a gene object from the database via its stable id. 00213 The gene will be retrieved in its native coordinate system (i.e. 00214 in the coordinate system it is stored in the database). It may 00215 be converted to a different coordinate system through a call to 00216 transform() or transfer(). If the gene or exon is not found 00217 undef is returned instead. 00218 Returntype : Bio::EnsEMBL::Gene or undef 00219 Exceptions : if we cant get the gene in given coord system 00220 Caller : general 00221 Status : Stable 00222 00223 =cut 00224 00225 sub fetch_by_stable_id { 00226 my ($self, $stable_id) = @_; 00227 00228 my $constraint = "g.stable_id = ? AND g.is_current = 1"; 00229 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR); 00230 my ($gene) = @{ $self->generic_fetch($constraint) }; 00231 00232 return $gene; 00233 } 00234 00235 00236 00237 =head2 fetch_all_by_biotype 00238 00239 Arg [1] : String $biotype 00240 listref of $biotypes 00241 The biotype of the gene to retrieve. You can have as an argument a reference 00242 to a list of biotypes 00243 Example : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding'); 00244 $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']); 00245 Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes. 00246 The genes will be retrieved in its native coordinate system (i.e. 00247 in the coordinate system it is stored in the database). It may 00248 be converted to a different coordinate system through a call to 00249 transform() or transfer(). If the gene or exon is not found 00250 undef is returned instead. 00251 Returntype : listref of Bio::EnsEMBL::Gene 00252 Exceptions : if we cant get the gene in given coord system 00253 Caller : general 00254 Status : Stable 00255 00256 =cut 00257 00258 sub fetch_all_by_biotype { 00259 my ($self, $biotype) = @_; 00260 00261 if (!defined $biotype){ 00262 throw("Biotype or listref of biotypes expected"); 00263 } 00264 my $constraint; 00265 if (ref($biotype) eq 'ARRAY'){ 00266 $constraint = "g.biotype IN ("; 00267 foreach my $b (@{$biotype}){ 00268 $constraint .= "?,"; 00269 $self->bind_param_generic_fetch($b,SQL_VARCHAR); 00270 } 00271 chop($constraint); #remove last , from expression 00272 $constraint .= ") and g.is_current = 1"; 00273 00274 } 00275 else{ 00276 $constraint = "g.biotype = ? and g.is_current = 1"; 00277 $self->bind_param_generic_fetch($biotype,SQL_VARCHAR); 00278 } 00279 my @genes = @{ $self->generic_fetch($constraint) }; 00280 return \@genes ; 00281 } 00282 00283 00284 sub fetch_all { 00285 my ($self) = @_; 00286 00287 my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1'; 00288 my @genes = @{ $self->generic_fetch($constraint) }; 00289 return \@genes ; 00290 } 00291 00292 00293 =head2 fetch_all_versions_by_stable_id 00294 00295 Arg [1] : String $stable_id 00296 The stable ID of the gene to retrieve 00297 Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id 00298 ('ENSG00000148944'); 00299 Description : Similar to fetch_by_stable_id, but retrieves all versions of a 00300 gene stored in the database. 00301 Returntype : listref of Bio::EnsEMBL::Gene 00302 Exceptions : if we cant get the gene in given coord system 00303 Caller : general 00304 Status : At Risk 00305 00306 =cut 00307 00308 sub fetch_all_versions_by_stable_id { 00309 my ($self, $stable_id) = @_; 00310 00311 my $constraint = "g.stable_id = ?"; 00312 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR); 00313 return $self->generic_fetch($constraint); 00314 } 00315 00316 00317 =head2 fetch_by_exon_stable_id 00318 00319 Arg [1] : String $id 00320 The stable id of an exon of the gene to retrieve 00321 Example : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944'); 00322 Description: Retrieves a gene object from the database via an exon stable id. 00323 The gene will be retrieved in its native coordinate system (i.e. 00324 in the coordinate system it is stored in the database). It may 00325 be converted to a different coordinate system through a call to 00326 transform() or transfer(). If the gene or exon is not found 00327 undef is returned instead. 00328 Returntype : Bio::EnsEMBL::Gene or undef 00329 Exceptions : none 00330 Caller : general 00331 Status : Stable 00332 00333 =cut 00334 00335 sub fetch_by_exon_stable_id { 00336 my ($self, $stable_id, $version) = @_; 00337 00338 my $sql = qq( 00339 SELECT t.gene_id 00340 FROM transcript as t, 00341 exon_transcript as et, 00342 exon as e 00343 WHERE t.transcript_id = et.transcript_id 00344 AND et.exon_id = e.exon_id 00345 AND e.stable_id = ? 00346 AND e.is_current = 1 00347 ); 00348 00349 my $sth = $self->prepare($sql); 00350 $sth->bind_param(1, $stable_id, SQL_VARCHAR); 00351 $sth->execute(); 00352 00353 my ($dbID) = $sth->fetchrow_array(); 00354 00355 return undef if(!defined($dbID)); 00356 00357 my $gene = $self->fetch_by_dbID($dbID); 00358 00359 return $gene; 00360 } 00361 00362 00363 =head2 fetch_all_by_domain 00364 00365 Arg [1] : String $domain 00366 The domain to fetch genes from 00367 Example : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) }; 00368 Description: Retrieves a listref of genes whose translation contain interpro 00369 domain $domain. The genes are returned in their native coord 00370 system (i.e. the coord_system they are stored in). If the coord 00371 system needs to be changed, then tranform or transfer should be 00372 called on the individual objects returned. 00373 Returntype : list of Bio::EnsEMBL::Genes 00374 Exceptions : none 00375 Caller : domainview 00376 Status : Stable 00377 00378 =cut 00379 00380 sub fetch_all_by_domain { 00381 my ($self, $domain) = @_; 00382 00383 throw("domain argument is required") unless ($domain); 00384 00385 my $sth = $self->prepare(qq( 00386 SELECT tr.gene_id 00387 FROM interpro i, 00388 protein_feature pf, 00389 transcript tr, 00390 translation tl, 00391 seq_region sr, 00392 coord_system cs 00393 WHERE cs.species_id = ? 00394 AND cs.coord_system_id = sr.coord_system_id 00395 AND sr.seq_region_id = tr.seq_region_id 00396 AND tr.is_current = 1 00397 AND tr.transcript_id = tl.transcript_id 00398 AND tl.translation_id = pf.translation_id 00399 AND pf.hit_name = i.id 00400 AND i.interpro_ac = ? 00401 GROUP BY tr.gene_id)); 00402 00403 $sth->bind_param( 1, $self->species_id(), SQL_VARCHAR ); 00404 $sth->bind_param( 2, $domain, SQL_VARCHAR ); 00405 00406 $sth->execute(); 00407 00408 my @array = @{$sth->fetchall_arrayref()}; 00409 $sth->finish(); 00410 00411 my @gene_ids = map {$_->[0]} @array; 00412 00413 return $self->fetch_all_by_dbID_list(\@gene_ids); 00414 } 00415 00416 00417 00418 =head2 fetch_all_by_Slice_and_external_dbname_link 00419 00420 Arg [1] : Bio::EnsEMBL::Slice $slice 00421 The slice to fetch genes on. 00422 Arg [2] : (optional) string $logic_name 00423 the logic name of the type of features to obtain 00424 Arg [3] : (optional) boolean $load_transcripts 00425 if true, transcripts will be loaded immediately 00426 rather than lazy loaded later. 00427 Arg [4] : Name of the external database 00428 Example : @genes = @{ 00429 $ga->fetch_all_by_Slice_and_external_dbname_link( 00430 $slice, undef, undef, "HUGO" ) }; 00431 Description: Overrides superclass method to optionally load 00432 transcripts immediately rather than lazy-loading them 00433 later. This is more efficient when there are a lot 00434 of genes whose transcripts are going to be used. The 00435 genes are then filtered to return only those with 00436 external database links of the type specified 00437 Returntype : reference to list of genes 00438 Exceptions : thrown if exon cannot be placed on transcript slice 00439 Caller : 00440 Status : Stable 00441 00442 =cut 00443 00444 sub fetch_all_by_Slice_and_external_dbname_link { 00445 my ( $self, $slice, $logic_name, $load_transcripts, $db_name ) = @_; 00446 00447 # Get the external_db_id(s) from the name. 00448 my $sth = $self->prepare( 00449 "SELECT external_db_id FROM external_db WHERE db_name = ?"); 00450 00451 $sth->bind_param( 1, $db_name, SQL_VARCHAR ); 00452 $sth->execute(); 00453 00454 my $external_db_id; 00455 $sth->bind_columns( \$external_db_id ); 00456 00457 my @external_db_ids; 00458 while ( $sth->fetch() ) { 00459 push( @external_db_ids, $external_db_id ); 00460 } 00461 00462 if ( scalar(@external_db_ids) == 0 ) { 00463 warn sprintf( "Could not find external database " 00464 . "'%s' in the external_db table\n" 00465 . "Available are:\n", 00466 $db_name ); 00467 00468 $sth = $self->prepare("SELECT DISTINCT db_name FROM external_db"); 00469 00470 $sth->execute(); 00471 $sth->bind_columns( \$external_db_id ); 00472 00473 while ( $sth->fetch() ) { 00474 warn "\t$external_db_id\n"; 00475 } 00476 return []; 00477 } 00478 00479 # Get the gene_ids for those with links. 00480 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor(); 00481 00482 my %linked_genes; 00483 foreach $external_db_id (@external_db_ids) { 00484 my @linked_genes = 00485 $dbe_adaptor->list_gene_ids_by_external_db_id($external_db_id); 00486 00487 foreach my $gene_id (@linked_genes) { 00488 $linked_genes{$gene_id} = 1; 00489 } 00490 } 00491 00492 # Get all the genes on the slice. 00493 my $genes = $self->SUPER::fetch_all_by_Slice_constraint( $slice, 00494 'g.is_current = 1', $logic_name ); 00495 00496 # Create a list of those that are in the gene_ids list. 00497 my @genes_passed; 00498 foreach my $gene (@$genes) { 00499 if ( exists( $linked_genes{ $gene->dbID() } ) ) { 00500 push( @genes_passed, $gene ); 00501 } 00502 } 00503 00504 # Return the list of those that passed. 00505 return \@genes_passed; 00506 } ## end sub fetch_all_by_Slice_and_external_dbname_link 00507 00508 =head2 fetch_all_by_Slice 00509 00510 Arg [1] : Bio::EnsEMBL::Slice $slice 00511 The slice to fetch genes on. 00512 Arg [2] : (optional) string $logic_name 00513 the logic name of the type of features to obtain 00514 Arg [3] : (optional) boolean $load_transcripts 00515 if true, transcripts will be loaded immediately rather than 00516 lazy loaded later. 00517 Arg [4] : (optional) string $source 00518 the source name of the features to obtain. 00519 Arg [5] : (optional) string biotype 00520 the biotype of the features to obtain. 00521 Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()}; 00522 Description: Overrides superclass method to optionally load transcripts 00523 immediately rather than lazy-loading them later. This 00524 is more efficient when there are a lot of genes whose 00525 transcripts are going to be used. 00526 Returntype : reference to list of genes 00527 Exceptions : thrown if exon cannot be placed on transcript slice 00528 Caller : Slice::get_all_Genes 00529 Status : Stable 00530 00531 =cut 00532 00533 sub fetch_all_by_Slice { 00534 my ( $self, $slice, $logic_name, $load_transcripts, $source, 00535 $biotype ) = @_; 00536 00537 my $constraint = 'g.is_current = 1'; 00538 00539 if ( defined($source) ) { 00540 $constraint .= " and g.source = '$source'"; 00541 } 00542 if ( defined($biotype) ) { 00543 $constraint .= " and g.biotype = '$biotype'"; 00544 } 00545 00546 my $genes = 00547 $self->SUPER::fetch_all_by_Slice_constraint( $slice, $constraint, 00548 $logic_name ); 00549 00550 # If there are less than two genes, still do lazy-loading. 00551 if ( !$load_transcripts || @$genes < 2 ) { 00552 return $genes; 00553 } 00554 00555 # Preload all of the transcripts now, instead of lazy loading later, 00556 # faster than one query per transcript. 00557 00558 # First check if transcripts are already preloaded. 00559 # FIXME: Should check all transcripts. 00560 if ( exists( $genes->[0]->{'_transcript_array'} ) ) { 00561 return $genes; 00562 } 00563 00564 # Get extent of region spanned by transcripts. 00565 my ( $min_start, $max_end ); 00566 foreach my $g (@$genes) { 00567 if ( !defined($min_start) || $g->seq_region_start() < $min_start ) { 00568 $min_start = $g->seq_region_start(); 00569 } 00570 if ( !defined($max_end) || $g->seq_region_end() > $max_end ) { 00571 $max_end = $g->seq_region_end(); 00572 } 00573 } 00574 00575 my $ext_slice; 00576 00577 if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) { 00578 $ext_slice = $slice; 00579 } else { 00580 my $sa = $self->db()->get_SliceAdaptor(); 00581 $ext_slice = $sa->fetch_by_region( 00582 $slice->coord_system->name(), $slice->seq_region_name(), 00583 $min_start, $max_end, 00584 $slice->strand(), $slice->coord_system->version() ); 00585 } 00586 00587 # Associate transcript identifiers with genes. 00588 00589 my %g_hash = map { $_->dbID => $_ } @{$genes}; 00590 00591 my $g_id_str = join( ',', keys(%g_hash) ); 00592 00593 my $sth = 00594 $self->prepare( "SELECT gene_id, transcript_id " 00595 . "FROM transcript " 00596 . "WHERE gene_id IN ($g_id_str)" ); 00597 00598 $sth->execute(); 00599 00600 my ( $g_id, $tr_id ); 00601 $sth->bind_columns( \( $g_id, $tr_id ) ); 00602 00603 my %tr_g_hash; 00604 00605 while ( $sth->fetch() ) { 00606 $tr_g_hash{$tr_id} = $g_hash{$g_id}; 00607 } 00608 00609 my $ta = $self->db()->get_TranscriptAdaptor(); 00610 my $transcripts = $ta->fetch_all_by_Slice( 00611 $ext_slice, 00612 1, undef, 00613 sprintf( "t.transcript_id IN (%s)", 00614 join( ',', sort { $a <=> $b } keys(%tr_g_hash) ) ) ); 00615 00616 # Move transcripts onto gene slice, and add them to genes. 00617 foreach my $tr ( @{$transcripts} ) { 00618 if ( !exists( $tr_g_hash{ $tr->dbID() } ) ) { next } 00619 00620 my $new_tr; 00621 if ( $slice != $ext_slice ) { 00622 $new_tr = $tr->transfer($slice); 00623 if ( !defined($new_tr) ) { 00624 throw("Unexpected. " 00625 . "Transcript could not be transfered onto Gene slice." ); 00626 } 00627 } else { 00628 $new_tr = $tr; 00629 } 00630 00631 $tr_g_hash{ $tr->dbID() }->add_Transcript($new_tr); 00632 } 00633 00634 return $genes; 00635 } ## end sub fetch_all_by_Slice 00636 00637 =head2 fetch_by_transcript_id 00638 00639 Arg [1] : Int $trans_id 00640 Unique database identifier for the transcript whose gene should 00641 be retrieved. The gene is returned in its native coord 00642 system (i.e. the coord_system it is stored in). If the coord 00643 system needs to be changed, then tranform or transfer should 00644 be called on the returned object. undef is returned if the 00645 gene or transcript is not found in the database. 00646 Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241); 00647 Description: Retrieves a gene from the database via the database identifier 00648 of one of its transcripts. 00649 Returntype : Bio::EnsEMBL::Gene 00650 Exceptions : none 00651 Caller : general 00652 Status : Stable 00653 00654 =cut 00655 00656 sub fetch_by_transcript_id { 00657 my ($self, $trans_id) = @_; 00658 00659 # this is a cheap SQL call 00660 my $sth = $self->prepare(qq( 00661 SELECT tr.gene_id 00662 FROM transcript tr 00663 WHERE tr.transcript_id = ? 00664 )); 00665 00666 $sth->bind_param(1, $trans_id, SQL_INTEGER); 00667 $sth->execute(); 00668 00669 my ($geneid) = $sth->fetchrow_array(); 00670 00671 $sth->finish(); 00672 00673 return undef if( !defined $geneid ); 00674 00675 my $gene = $self->fetch_by_dbID($geneid); 00676 return $gene; 00677 } 00678 00679 00680 =head2 fetch_by_transcript_stable_id 00681 00682 Arg [1] : string $trans_stable_id 00683 transcript stable ID whose gene should be retrieved 00684 Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id 00685 ('ENST0000234'); 00686 Description: Retrieves a gene from the database via the stable ID of one of 00687 its transcripts 00688 Returntype : Bio::EnsEMBL::Gene 00689 Exceptions : none 00690 Caller : general 00691 Status : Stable 00692 00693 =cut 00694 00695 sub fetch_by_transcript_stable_id { 00696 my ($self, $trans_stable_id) = @_; 00697 00698 my $sth = $self->prepare(qq( 00699 SELECT gene_id 00700 FROM transcript 00701 WHERE stable_id = ? 00702 AND is_current = 1 00703 )); 00704 00705 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR); 00706 $sth->execute(); 00707 00708 my ($geneid) = $sth->fetchrow_array(); 00709 $sth->finish; 00710 00711 return undef if (!defined $geneid); 00712 00713 my $gene = $self->fetch_by_dbID($geneid); 00714 return $gene; 00715 } 00716 00717 00718 =head2 fetch_by_translation_stable_id 00719 00720 Arg [1] : String $translation_stable_id 00721 The stable id of a translation of the gene to be obtained 00722 Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id 00723 ('ENSP00000278194'); 00724 Description: Retrieves a gene via the stable id of one of its translations. 00725 Returntype : Bio::EnsEMBL::Gene 00726 Exceptions : none 00727 Caller : general 00728 Status : Stable 00729 00730 =cut 00731 00732 sub fetch_by_translation_stable_id { 00733 my ($self, $translation_stable_id) = @_; 00734 00735 my $sth = $self->prepare(qq( 00736 SELECT tr.gene_id 00737 FROM transcript tr, 00738 translation tl 00739 WHERE tl.stable_id = ? 00740 AND tr.transcript_id = tl.transcript_id 00741 AND tr.is_current = 1 00742 )); 00743 00744 $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR); 00745 $sth->execute(); 00746 00747 my ($geneid) = $sth->fetchrow_array(); 00748 $sth->finish; 00749 if( !defined $geneid ) { 00750 return undef; 00751 } 00752 return $self->fetch_by_dbID($geneid); 00753 } 00754 00755 00756 00757 00758 =head2 fetch_all_by_external_name 00759 00760 Arg [1] : String $external_name 00761 The external identifier for the gene to be obtained 00762 Arg [2] : (optional) String $external_db_name 00763 The name of the external database from which the 00764 identifier originates. 00765 Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')} 00766 @many_genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA%')} 00767 Description: Retrieves a list of genes with an external database 00768 identifier $external_name. The genes returned are in 00769 their native coordinate system, i.e. in the coordinate 00770 system they are stored in the database in. If another 00771 coordinate system is required then the Gene::transfer or 00772 Gene::transform method can be used. 00773 SQL wildcards % and _ are supported in the $external_name 00774 Returntype : listref of Bio::EnsEMBL::Gene 00775 Exceptions : none 00776 Caller : goview, general 00777 Status : Stable 00778 00779 =cut 00780 00781 sub fetch_all_by_external_name { 00782 my ( $self, $external_name, $external_db_name ) = @_; 00783 00784 my $entryAdaptor = $self->db->get_DBEntryAdaptor(); 00785 00786 my @ids = 00787 $entryAdaptor->list_gene_ids_by_extids( $external_name, 00788 $external_db_name ); 00789 00790 my %genes_by_dbIDs = 00791 map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) }; 00792 00793 my @result = map { $genes_by_dbIDs{$_} } @ids; 00794 00795 return \@result; 00796 } 00797 00798 =head2 fetch_all_by_GOTerm 00799 00800 Arg [1] : Bio::EnsEMBL::OntologyTerm 00801 The GO term for which genes should be fetched. 00802 00803 Example: @genes = @{ 00804 $gene_adaptor->fetch_all_by_GOTerm( 00805 $go_adaptor->fetch_by_accession('GO:0030326') ) }; 00806 00807 Description : Retrieves a list of genes that are associated with 00808 the given GO term, or with any of its descendent 00809 GO terms. The genes returned are in their native 00810 coordinate system, i.e. in the coordinate system 00811 in which they are stored in the database. If 00812 another coordinate system is required then the 00813 Gene::transfer or Gene::transform method can be 00814 used. 00815 00816 Return type : listref of Bio::EnsEMBL::Gene 00817 Exceptions : Throws of argument is not a GO term 00818 Caller : general 00819 Status : Stable 00820 00821 =cut 00822 00823 sub fetch_all_by_GOTerm { 00824 my ( $self, $term ) = @_; 00825 00826 assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' ); 00827 if ( $term->ontology() ne 'GO' ) { 00828 throw('Argument is not a GO term'); 00829 } 00830 00831 my $entryAdaptor = $self->db->get_DBEntryAdaptor(); 00832 00833 my %unique_dbIDs; 00834 foreach my $accession ( map { $_->accession() } 00835 ( $term, @{ $term->descendants() } ) ) 00836 { 00837 my @ids = 00838 $entryAdaptor->list_gene_ids_by_extids( $accession, 'GO' ); 00839 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 } 00840 } 00841 00842 my @result = @{ 00843 $self->fetch_all_by_dbID_list( 00844 [ sort { $a <=> $b } keys(%unique_dbIDs) ] 00845 ) }; 00846 00847 return \@result; 00848 } ## end sub fetch_all_by_GOTerm 00849 00850 =head2 fetch_all_by_GOTerm_accession 00851 00852 Arg [1] : String 00853 The GO term accession for which genes should be 00854 fetched. 00855 00856 Example : 00857 00858 @genes = 00859 @{ $gene_adaptor->fetch_all_by_GOTerm_accession( 00860 'GO:0030326') }; 00861 00862 Description : Retrieves a list of genes that are associated with 00863 the given GO term, or with any of its descendent 00864 GO terms. The genes returned are in their native 00865 coordinate system, i.e. in the coordinate system 00866 in which they are stored in the database. If 00867 another coordinate system is required then the 00868 Gene::transfer or Gene::transform method can be 00869 used. 00870 00871 Return type : listref of Bio::EnsEMBL::Gene 00872 Exceptions : Throws of argument is not a GO term accession 00873 Caller : general 00874 Status : Stable 00875 00876 =cut 00877 00878 sub fetch_all_by_GOTerm_accession { 00879 my ( $self, $accession ) = @_; 00880 00881 if ( $accession !~ /^GO:/ ) { 00882 throw('Argument is not a GO term accession'); 00883 } 00884 00885 my $goAdaptor = 00886 Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology', 00887 'OntologyTerm' ); 00888 00889 my $term = $goAdaptor->fetch_by_accession($accession); 00890 00891 return $self->fetch_all_by_GOTerm($term); 00892 } 00893 00894 =head2 fetch_all_alt_alleles 00895 00896 Arg [1] : Bio::EnsEMBL::Gene $gene 00897 The gene to fetch alternative alleles for 00898 Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) }; 00899 foreach my $alt_gene (@alt_genes) { 00900 print "Alternate allele: " . $alt_gene->stable_id() . "\n"; 00901 } 00902 Description: Retrieves genes which are alternate alleles to a provided gene. 00903 Alternate alleles in Ensembl are genes which are similar and are 00904 on an alternative haplotype of the same region. There are not 00905 currently very many of these. This method will return a 00906 reference to an empty list if no alternative alleles are found. 00907 Returntype : listref of Bio::EnsEMBL::Genes 00908 Exceptions : throw if incorrect arg provided 00909 warning if gene arg does not have dbID 00910 Caller : Gene::get_all_alt_alleles 00911 Status : Stable 00912 00913 =cut 00914 00915 sub fetch_all_alt_alleles { 00916 my $self = shift; 00917 my $gene = shift; 00918 00919 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { 00920 throw('Bio::EnsEMBL::Gene argument is required'); 00921 } 00922 00923 my $gene_id = $gene->dbID(); 00924 00925 if(!$gene_id) { 00926 warning('Cannot retrieve alternate alleles for gene without dbID'); 00927 return []; 00928 } 00929 00930 my $sth = $self->prepare("SELECT aa1.gene_id " . 00931 "FROM alt_allele aa1, alt_allele aa2 " . 00932 "WHERE aa1.alt_allele_id = aa2.alt_allele_id " . 00933 "AND aa2.gene_id = ? " . 00934 "AND aa1.gene_id <> ?"); 00935 00936 $sth->bind_param(1, $gene_id, SQL_INTEGER); 00937 $sth->bind_param(2, $gene_id, SQL_INTEGER); 00938 $sth->execute(); 00939 00940 my @alt_ids; 00941 my $row; 00942 while($row = $sth->fetchrow_arrayref()) { 00943 push @alt_ids, $row->[0]; 00944 } 00945 $sth->finish(); 00946 00947 if (@alt_ids) { 00948 return $self->fetch_all_by_dbID_list(\@alt_ids); 00949 } 00950 00951 return []; 00952 } 00953 00954 sub is_ref{ 00955 my ( $self, $gene_id) = @_; 00956 my $is_not_ref; 00957 00958 # easier to find if it is not an alt_Allele do this and then negate it. 00959 my $sth = $self->prepare("select count(1) from alt_allele where gene_id = $gene_id and is_ref = 0"); 00960 $sth->execute(); 00961 $sth->bind_columns(\$is_not_ref); 00962 $sth->fetch; 00963 00964 if(defined($is_not_ref) and $is_not_ref){ 00965 return 0; 00966 } 00967 00968 return 1; 00969 } 00970 00971 00972 =head2 store_alt_alleles 00973 00974 00975 Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes 00976 Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]); 00977 Description: This method creates a group of alternative alleles (i.e. locus) 00978 from a set of genes. The genes should be genes from alternate 00979 haplotypes which are similar. The genes must already be stored 00980 in this database. At least 2 genes must be in the list reference 00981 provided. 00982 Returntype : none 00983 Exceptions : throw on incorrect arguments 00984 throw on sql error (e.g. duplicate unique id) 00985 Caller : general 00986 Status : Stable 00987 00988 =cut 00989 00990 sub store_alt_alleles { 00991 my $self = shift; 00992 my $genes = shift; 00993 00994 if(!ref($genes) eq 'ARRAY') { 00995 throw('List reference of Bio::EnsEMBL::Gene argument expected.'); 00996 } 00997 00998 my $num_genes = scalar(@$genes); 00999 01000 if($num_genes < 2) { 01001 throw("At least 2 genes must be provided to construct alternate alleles."); 01002 } 01003 01004 return if(!@$genes); 01005 01006 # 01007 #insert the first gene seperately in order to get a unique identifier for 01008 #the set of alleles 01009 # 01010 my $gene = $genes->[0]; 01011 01012 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { 01013 throw('List reference of Bio::EnsEMBL::Gene argument expected.'); 01014 } 01015 01016 my $gene_id = $gene->dbID(); 01017 01018 if (!$gene_id) { 01019 throw("Genes must have dbIDs in order to construct alternate alleles."); 01020 } 01021 01022 my $sth = $self->prepare("INSERT INTO alt_allele (gene_id) VALUES (?)"); 01023 $sth->bind_param(1, $gene->dbID, SQL_INTEGER); 01024 $sth->execute(); 01025 01026 my $alt_allele_id = $sth->{'mysql_insertid'}; 01027 $sth->finish(); 01028 01029 # 01030 # Insert all subsequent alt alleles using the alt_allele identifier 01031 # from the first insert 01032 # 01033 01034 $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, gene_id) " . 01035 "VALUES (?,?)"); 01036 01037 for (my $i = 1; $i < $num_genes; $i++) { 01038 my $gene = $genes->[$i]; 01039 01040 if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { 01041 throw("List reference of Bio::EnsEMBL::Gene argument expected"); 01042 } 01043 01044 $gene_id = $gene->dbID(); 01045 01046 if (!$gene_id) { 01047 # This is an error but we have already inserted into the database 01048 # delete the already inserted entries to restore the state of the 01049 # database 01050 $sth->finish(); 01051 $sth->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?"); 01052 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); 01053 $sth->execute(); 01054 $sth->finish(); 01055 throw('Genes must have dbIDs in order to construct alternate alleles.'); 01056 } 01057 01058 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); 01059 $sth->bind_param(2, $gene_id, SQL_INTEGER); 01060 eval { 01061 $sth->execute(); 01062 }; 01063 01064 if ($@) { 01065 # an error occured, revert the db to the previous state 01066 $sth = $self->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?"); 01067 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); 01068 $sth->execute(); 01069 $sth->finish(); 01070 throw("An SQL error occured inserting alternate alleles:\n$@"); 01071 } 01072 } 01073 01074 $sth->finish(); 01075 01076 return; 01077 } 01078 01079 01080 =head2 store 01081 01082 Arg [1] : Bio::EnsEMBL::Gene $gene 01083 The gene to store in the database 01084 Arg [2] : ignore_release in xrefs [default 1] set to 0 to use release info 01085 in external database references 01086 Example : $gene_adaptor->store($gene); 01087 Description: Stores a gene in the database. 01088 Returntype : the database identifier (dbID) of the newly stored gene 01089 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if 01090 $gene does not have an analysis object 01091 Caller : general 01092 Status : Stable 01093 01094 =cut 01095 01096 sub store { 01097 my ($self, $gene, $ignore_release) = @_; 01098 01099 if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { 01100 throw("Must store a gene object, not a $gene"); 01101 } 01102 if(!defined($ignore_release)){ 01103 $ignore_release = 1; 01104 } 01105 my $db = $self->db(); 01106 01107 if ($gene->is_stored($db)) { 01108 return $gene->dbID(); 01109 } 01110 01111 # ensure coords are correct before storing 01112 $gene->recalculate_coordinates(); 01113 01114 my $analysis = $gene->analysis(); 01115 throw("Genes must have an analysis object.") if(!defined($analysis)); 01116 01117 my $analysis_id; 01118 if ($analysis->is_stored($db)) { 01119 $analysis_id = $analysis->dbID(); 01120 } else { 01121 $analysis_id = $db->get_AnalysisAdaptor->store($analysis); 01122 } 01123 01124 my $type = $gene->biotype || ""; 01125 01126 # default to is_current = 1 if this attribute is not set 01127 my $is_current = $gene->is_current; 01128 $is_current = 1 unless (defined($is_current)); 01129 01130 my $original = $gene; 01131 my $original_transcripts = $gene->get_all_Transcripts(); 01132 01133 my $seq_region_id; 01134 01135 ( $gene, $seq_region_id ) = $self->_pre_store($gene); 01136 01137 my $store_gene_sql = qq( 01138 INSERT INTO gene 01139 SET biotype = ?, 01140 analysis_id = ?, 01141 seq_region_id = ?, 01142 seq_region_start = ?, 01143 seq_region_end = ?, 01144 seq_region_strand = ?, 01145 description = ?, 01146 source = ?, 01147 status = ?, 01148 is_current = ?, 01149 canonical_transcript_id = ?, 01150 canonical_annotation = ? 01151 ); 01152 01153 if (defined($gene->stable_id)) { 01154 my $created = $self->db->dbc->from_seconds_to_date($gene->created_date()); 01155 my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date()); 01156 $store_gene_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified; 01157 01158 } 01159 01160 # column status is used from schema version 34 onwards (before it was 01161 # confidence) 01162 01163 my $sth = $self->prepare($store_gene_sql); 01164 $sth->bind_param( 1, $type, SQL_VARCHAR ); 01165 $sth->bind_param( 2, $analysis_id, SQL_INTEGER ); 01166 $sth->bind_param( 3, $seq_region_id, SQL_INTEGER ); 01167 $sth->bind_param( 4, $gene->start(), SQL_INTEGER ); 01168 $sth->bind_param( 5, $gene->end(), SQL_INTEGER ); 01169 $sth->bind_param( 6, $gene->strand(), SQL_TINYINT ); 01170 $sth->bind_param( 7, $gene->description(), SQL_LONGVARCHAR ); 01171 $sth->bind_param( 8, $gene->source(), SQL_VARCHAR ); 01172 $sth->bind_param( 9, $gene->status(), SQL_VARCHAR ); 01173 $sth->bind_param( 10, $is_current, SQL_TINYINT ); 01174 01175 # Canonical transcript ID will be updated later. 01176 # Set it to zero for now. 01177 $sth->bind_param( 11, 0, SQL_TINYINT ); 01178 01179 $sth->bind_param( 12, $gene->canonical_annotation(), SQL_VARCHAR ); 01180 01181 if ( defined($gene->stable_id) ) { 01182 01183 $sth->bind_param( 13, $gene->stable_id, SQL_VARCHAR ); 01184 my $version = ($gene->version()) ? $gene->version() : 1; 01185 $sth->bind_param( 14, $version, SQL_INTEGER ); 01186 } 01187 01188 $sth->execute(); 01189 $sth->finish(); 01190 01191 my $gene_dbID = $sth->{'mysql_insertid'}; 01192 01193 # store the dbentries associated with this gene 01194 my $dbEntryAdaptor = $db->get_DBEntryAdaptor(); 01195 01196 foreach my $dbe ( @{ $gene->get_all_DBEntries } ) { 01197 $dbEntryAdaptor->store( $dbe, $gene_dbID, "Gene", $ignore_release ); 01198 } 01199 01200 # We allow transcripts not to share equal exons and instead have 01201 # copies. For the database we still want sharing though, to have 01202 # easier time with stable ids. So we need to have a step to merge 01203 # exons together before store. 01204 my %exons; 01205 01206 foreach my $trans ( @{$gene->get_all_Transcripts} ) { 01207 foreach my $e ( @{$trans->get_all_Exons} ) { 01208 my $key = $e->hashkey(); 01209 if( exists $exons{ $key } ) { 01210 $trans->swap_exons( $e, $exons{$key} ); 01211 } else { 01212 $exons{$key} = $e; 01213 } 01214 } 01215 } 01216 01217 my $transcript_adaptor = $db->get_TranscriptAdaptor(); 01218 01219 my $transcripts = $gene->get_all_Transcripts(); 01220 01221 my $new_canonical_transcript_id; 01222 for ( my $i = 0; $i < @$transcripts; $i++ ) { 01223 my $new = $transcripts->[$i]; 01224 my $old = $original_transcripts->[$i]; 01225 01226 $transcript_adaptor->store( $new, $gene_dbID, $analysis_id ); 01227 01228 if ( !defined($new_canonical_transcript_id) 01229 && $new->is_canonical() ) 01230 { 01231 $new_canonical_transcript_id = $new->dbID(); 01232 } 01233 01234 # update the original transcripts since we may have made copies of 01235 # them by transforming the gene 01236 $old->dbID( $new->dbID() ); 01237 $old->adaptor( $new->adaptor() ); 01238 01239 if ( $new->translation ) { 01240 $old->translation->dbID( $new->translation()->dbID ); 01241 $old->translation->adaptor( $new->translation()->adaptor ); 01242 } 01243 } 01244 01245 if ( defined($new_canonical_transcript_id) ) { 01246 # Now the canonical transcript has been stored, so update the 01247 # canonical_transcript_id of this gene with the new dbID. 01248 my $sth = $self->prepare( 01249 q( 01250 UPDATE gene 01251 SET canonical_transcript_id = ? 01252 WHERE gene_id = ?) 01253 ); 01254 01255 $sth->bind_param( 1, $new_canonical_transcript_id, SQL_INTEGER ); 01256 $sth->bind_param( 2, $gene_dbID, SQL_INTEGER ); 01257 01258 $sth->execute(); 01259 $sth->finish(); 01260 } 01261 01262 # update gene to point to display xref if it is set 01263 if(my $display_xref = $gene->display_xref) { 01264 my $dxref_id; 01265 if($display_xref->is_stored($db)) { 01266 $dxref_id = $display_xref->dbID(); 01267 } else { 01268 $dxref_id = $dbEntryAdaptor->exists($display_xref); 01269 } 01270 01271 if(defined($dxref_id)) { 01272 my $sth = $self->prepare 01273 ("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?"); 01274 $sth->bind_param(1, $dxref_id, SQL_INTEGER); 01275 $sth->bind_param(2, $gene_dbID, SQL_INTEGER); 01276 $sth->execute(); 01277 $sth->finish(); 01278 $display_xref->dbID($dxref_id); 01279 $display_xref->adaptor($dbEntryAdaptor); 01280 $display_xref->dbID($dxref_id); 01281 $display_xref->adaptor($dbEntryAdaptor); 01282 } else { 01283 warning("Display_xref ".$display_xref->dbname().":". 01284 $display_xref->display_id() . " is not stored in database.\n". 01285 "Not storing relationship to this gene."); 01286 $display_xref->dbID(undef); 01287 $display_xref->adaptor(undef); 01288 } 01289 } 01290 01291 # store gene attributes if there are any 01292 my $attr_adaptor = $db->get_AttributeAdaptor(); 01293 $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes); 01294 01295 # store unconventional transcript associations if there are any 01296 my $utaa = $db->get_UnconventionalTranscriptAssociationAdaptor(); 01297 foreach my $uta (@{$gene->get_all_unconventional_transcript_associations()}) { 01298 $utaa->store($uta); 01299 } 01300 01301 # set the adaptor and dbID on the original passed in gene not the 01302 # transfered copy 01303 $original->adaptor($self); 01304 $original->dbID($gene_dbID); 01305 01306 return $gene_dbID; 01307 } 01308 01309 01310 =head2 remove 01311 01312 Arg [1] : Bio::EnsEMBL::Gene $gene 01313 the gene to remove from the database 01314 Example : $gene_adaptor->remove($gene); 01315 Description: Removes a gene completely from the database. All associated 01316 transcripts, exons, stable_identifiers, descriptions, etc. 01317 are removed as well. Use with caution! 01318 Returntype : none 01319 Exceptions : throw on incorrect arguments 01320 warning if gene is not stored in this database 01321 Caller : general 01322 Status : Stable 01323 01324 =cut 01325 01326 sub remove { 01327 my $self = shift; 01328 my $gene = shift; 01329 01330 if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { 01331 throw("Bio::EnsEMBL::Gene argument expected."); 01332 } 01333 01334 if ( !$gene->is_stored($self->db()) ) { 01335 warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " . 01336 "this database."); 01337 return; 01338 } 01339 01340 # remove all object xrefs associated with this gene 01341 01342 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor(); 01343 foreach my $dbe (@{$gene->get_all_DBEntries()}) { 01344 $dbe_adaptor->remove_from_object($dbe, $gene, 'Gene'); 01345 } 01346 01347 # remove all alternative allele entries associated with this gene 01348 my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?"); 01349 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); 01350 $sth->execute(); 01351 $sth->finish(); 01352 01353 # remove the attributes associated with this transcript 01354 my $attrib_adaptor = $self->db->get_AttributeAdaptor; 01355 $attrib_adaptor->remove_from_Gene($gene); 01356 01357 # remove all of the transcripts associated with this gene 01358 my $transcriptAdaptor = $self->db->get_TranscriptAdaptor(); 01359 foreach my $trans ( @{$gene->get_all_Transcripts()} ) { 01360 $transcriptAdaptor->remove($trans); 01361 } 01362 01363 # remove any unconventional transcript associations involving this gene 01364 01365 $sth = 01366 $self->prepare( "DELETE FROM unconventional_transcript_association " 01367 . "WHERE gene_id = ? " ); 01368 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); 01369 $sth->execute(); 01370 $sth->finish(); 01371 01372 # remove this gene from the database 01373 01374 $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? "); 01375 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); 01376 $sth->execute(); 01377 $sth->finish(); 01378 01379 # unset the gene identifier and adaptor thereby flagging it as unstored 01380 01381 $gene->dbID(undef); 01382 $gene->adaptor(undef); 01383 01384 return; 01385 } 01386 01387 01388 =head2 get_Interpro_by_geneid 01389 01390 Arg [1] : String $gene_stable_id 01391 The stable ID of the gene to obtain 01392 Example : @i = @{ 01393 $gene_adaptor->get_Interpro_by_geneid( 01394 $gene->stable_id() ) }; 01395 Description: Gets interpro accession numbers by gene stable id. A hack really 01396 - we should have a much more structured system than this. 01397 Returntype : listref of strings (Interpro_acc:description) 01398 Exceptions : none 01399 Caller : domainview 01400 Status : Stable 01401 01402 =cut 01403 01404 sub get_Interpro_by_geneid { 01405 my ($self, $gene_stable_id) = @_; 01406 01407 my $sql = qq( 01408 SELECT i.interpro_ac, 01409 x.description 01410 FROM transcript t, 01411 translation tl, 01412 protein_feature pf, 01413 interpro i, 01414 xref x, 01415 gene g 01416 WHERE g.stable_id = ? 01417 AND t.gene_id = g.gene_id 01418 AND t.is_current = 1 01419 AND tl.transcript_id = t.transcript_id 01420 AND tl.translation_id = pf.translation_id 01421 AND i.id = pf.hit_name 01422 AND i.interpro_ac = x.dbprimary_acc); 01423 01424 my $sth = $self->prepare($sql); 01425 01426 $sth->bind_param( 1, $gene_stable_id, SQL_VARCHAR ); 01427 01428 $sth->execute; 01429 01430 my @out; 01431 my %h; 01432 while( (my $arr = $sth->fetchrow_arrayref()) ) { 01433 if( $h{$arr->[0]} ) { next; } 01434 $h{$arr->[0]}=1; 01435 my $string = $arr->[0] .":".$arr->[1]; 01436 push(@out,$string); 01437 } 01438 01439 return \@out; 01440 } 01441 01442 01443 =head2 update 01444 01445 Arg [1] : Bio::EnsEMBL::Gene $gene 01446 The gene to update 01447 Example : $gene_adaptor->update($gene); 01448 Description: Updates the type, analysis, display_xref, status, is_current and 01449 description of a gene in the database. 01450 Returntype : None 01451 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene 01452 Caller : general 01453 Status : Stable 01454 01455 =cut 01456 01457 sub update { 01458 my ($self, $gene) = @_; 01459 my $update = 0; 01460 01461 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { 01462 throw("Must update a gene object, not a $gene"); 01463 } 01464 01465 my $update_gene_sql = qq( 01466 UPDATE gene 01467 SET biotype = ?, 01468 analysis_id = ?, 01469 display_xref_id = ?, 01470 status = ?, 01471 description = ?, 01472 is_current = ?, 01473 canonical_transcript_id = ?, 01474 canonical_annotation = ? 01475 WHERE gene_id = ? 01476 ); 01477 01478 my $display_xref = $gene->display_xref(); 01479 my $display_xref_id; 01480 01481 if ( $display_xref && $display_xref->dbID() ) { 01482 $display_xref_id = $display_xref->dbID(); 01483 } else { 01484 $display_xref_id = undef; 01485 } 01486 01487 my $sth = $self->prepare( $update_gene_sql ); 01488 01489 $sth->bind_param( 1, $gene->biotype(), SQL_VARCHAR ); 01490 $sth->bind_param( 2, $gene->analysis->dbID(), SQL_INTEGER ); 01491 $sth->bind_param( 3, $display_xref_id, SQL_INTEGER ); 01492 $sth->bind_param( 4, $gene->status(), SQL_VARCHAR ); 01493 $sth->bind_param( 5, $gene->description(), SQL_VARCHAR ); 01494 $sth->bind_param( 6, $gene->is_current(), SQL_TINYINT ); 01495 01496 if ( defined( $gene->canonical_transcript() ) ) { 01497 $sth->bind_param( 7, $gene->canonical_transcript()->dbID(), 01498 SQL_INTEGER ); 01499 } else { 01500 $sth->bind_param( 7, 0, SQL_INTEGER ); 01501 } 01502 01503 $sth->bind_param( 8, $gene->canonical_annotation(), SQL_VARCHAR ); 01504 $sth->bind_param( 9, $gene->dbID(), SQL_INTEGER ); 01505 01506 $sth->execute(); 01507 01508 # maybe should update stable id ??? 01509 } 01510 01511 01512 # _objs_from_sth 01513 01514 # Arg [1] : StatementHandle $sth 01515 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper 01516 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice 01517 # Description: PROTECTED implementation of abstract superclass method. 01518 # responsible for the creation of Genes 01519 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system 01520 # Exceptions : none 01521 # Caller : internal 01522 # Status : Stable 01523 01524 sub _objs_from_sth { 01525 my ($self, $sth, $mapper, $dest_slice) = @_; 01526 01527 # 01528 # This code is ugly because an attempt has been made to remove as many 01529 # function calls as possible for speed purposes. Thus many caches and 01530 # a fair bit of gymnastics is used. 01531 # 01532 01533 my $sa = $self->db()->get_SliceAdaptor(); 01534 my $aa = $self->db()->get_AnalysisAdaptor(); 01535 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor(); 01536 01537 my @genes; 01538 my %analysis_hash; 01539 my %slice_hash; 01540 my %sr_name_hash; 01541 my %sr_cs_hash; 01542 01543 my ( 01544 $gene_id, $seq_region_id, 01545 $seq_region_start, $seq_region_end, 01546 $seq_region_strand, $analysis_id, 01547 $biotype, $display_xref_id, 01548 $gene_description, $status, 01549 $source, $is_current, 01550 $canonical_transcript_id, $canonical_annotation, 01551 $stable_id, $version, 01552 $created_date, $modified_date, 01553 $xref_display_id, $xref_primary_acc, 01554 $xref_desc, $xref_version, 01555 $external_db, $external_status, 01556 $external_release, $external_db_name, 01557 $info_type, $info_text 01558 ); 01559 01560 $sth->bind_columns( 01561 \( 01562 $gene_id, $seq_region_id, 01563 $seq_region_start, $seq_region_end, 01564 $seq_region_strand, $analysis_id, 01565 $biotype, $display_xref_id, 01566 $gene_description, $status, 01567 $source, $is_current, 01568 $canonical_transcript_id, $canonical_annotation, 01569 $stable_id, $version, 01570 $created_date, $modified_date, 01571 $xref_display_id, $xref_primary_acc, 01572 $xref_desc, $xref_version, 01573 $external_db, $external_status, 01574 $external_release, $external_db_name, 01575 $info_type, $info_text 01576 ) ); 01577 01578 my $asm_cs; 01579 my $cmp_cs; 01580 my $asm_cs_vers; 01581 my $asm_cs_name; 01582 my $cmp_cs_vers; 01583 my $cmp_cs_name; 01584 01585 if($mapper) { 01586 $asm_cs = $mapper->assembled_CoordSystem(); 01587 $cmp_cs = $mapper->component_CoordSystem(); 01588 $asm_cs_name = $asm_cs->name(); 01589 $asm_cs_vers = $asm_cs->version(); 01590 $cmp_cs_name = $cmp_cs->name(); 01591 $cmp_cs_vers = $cmp_cs->version(); 01592 } 01593 01594 my $dest_slice_start; 01595 my $dest_slice_end; 01596 my $dest_slice_strand; 01597 my $dest_slice_length; 01598 my $dest_slice_sr_name; 01599 my $dest_slice_sr_id; 01600 01601 if($dest_slice) { 01602 $dest_slice_start = $dest_slice->start(); 01603 $dest_slice_end = $dest_slice->end(); 01604 $dest_slice_strand = $dest_slice->strand(); 01605 $dest_slice_length = $dest_slice->length(); 01606 $dest_slice_sr_name = $dest_slice->seq_region_name(); 01607 $dest_slice_sr_id = $dest_slice->get_seq_region_id(); 01608 } 01609 01610 FEATURE: while($sth->fetch()) { 01611 #get the analysis object 01612 my $analysis = $analysis_hash{$analysis_id} ||= 01613 $aa->fetch_by_dbID($analysis_id); 01614 01615 #need to get the internal_seq_region, if present 01616 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); 01617 my $slice = $slice_hash{"ID:".$seq_region_id}; 01618 01619 if(!$slice) { 01620 $slice = $sa->fetch_by_seq_region_id($seq_region_id); 01621 $slice_hash{"ID:".$seq_region_id} = $slice; 01622 $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); 01623 $sr_cs_hash{$seq_region_id} = $slice->coord_system(); 01624 } 01625 01626 my $sr_name = $sr_name_hash{$seq_region_id}; 01627 my $sr_cs = $sr_cs_hash{$seq_region_id}; 01628 01629 # 01630 # remap the feature coordinates to another coord system 01631 # if a mapper was provided 01632 # 01633 if($mapper) { 01634 01635 ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) = 01636 $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, 01637 $seq_region_strand, $sr_cs); 01638 01639 #skip features that map to gaps or coord system boundaries 01640 next FEATURE if(!defined($seq_region_id)); 01641 01642 #get a slice in the coord system we just mapped to 01643 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { 01644 $slice = $slice_hash{"ID:".$seq_region_id} ||= 01645 $sa->fetch_by_seq_region_id($seq_region_id); 01646 # } else { 01647 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= 01648 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, 01649 # $asm_cs_vers); 01650 # } 01651 } 01652 01653 # 01654 # If a destination slice was provided convert the coords. 01655 # 01656 if ( defined($dest_slice) ) { 01657 if ( $dest_slice_strand == 1 ) { 01658 # Positive strand. 01659 01660 $seq_region_start = $seq_region_start - $dest_slice_start + 1; 01661 $seq_region_end = $seq_region_end - $dest_slice_start + 1; 01662 01663 if ( $dest_slice->is_circular() ) { 01664 # Handle cicular chromosomes. 01665 01666 if ( $seq_region_start > $seq_region_end ) { 01667 # Looking at a feature overlapping the chromsome origin. 01668 01669 if ( $seq_region_end > $dest_slice_start ) { 01670 # Looking at the region in the beginning of the 01671 # chromosome. 01672 $seq_region_start -= $dest_slice->seq_region_length(); 01673 } 01674 01675 if ( $seq_region_end < 0 ) { 01676 $seq_region_end += $dest_slice->seq_region_length(); 01677 } 01678 01679 } else { 01680 01681 if ( $dest_slice_start > $dest_slice_end 01682 && $seq_region_end < 0 ) 01683 { 01684 # Looking at the region overlapping the chromosome 01685 # origin and a feature which is at the beginning of the 01686 # chromosome. 01687 $seq_region_start += $dest_slice->seq_region_length(); 01688 $seq_region_end += $dest_slice->seq_region_length(); 01689 } 01690 } 01691 01692 } ## end if ( $dest_slice->is_circular...) 01693 01694 } else { 01695 # Negative strand. 01696 01697 if ( $dest_slice->is_circular() 01698 && $seq_region_start > $seq_region_end ) 01699 { 01700 # Handle cicular chromosomes. 01701 01702 if ( $seq_region_end > $dest_slice_start ) { 01703 # Looking at the region in the beginning of the 01704 # chromosome. 01705 $seq_region_start = $dest_slice_end - $seq_region_end + 1; 01706 $seq_region_end = 01707 $seq_region_end - 01708 $dest_slice->seq_region_length - 01709 $dest_slice_start + 1; 01710 } else { 01711 my $tmp_seq_region_start = $seq_region_start; 01712 $seq_region_start = 01713 $dest_slice_end - 01714 $seq_region_end - 01715 $dest_slice->seq_region_length + 1; 01716 $seq_region_end = 01717 $dest_slice_end - $tmp_seq_region_start + 1; 01718 } 01719 01720 } else { 01721 # Non-circular chromosome. 01722 01723 my $tmp_seq_region_start = $seq_region_start; 01724 $seq_region_start = $dest_slice_end - $seq_region_end + 1; 01725 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; 01726 } 01727 01728 $seq_region_strand = -$seq_region_strand; 01729 01730 } ## end else [ if ( $dest_slice_strand...)] 01731 01732 # Throw away features off the end of the requested slice or on 01733 # different seq_region. 01734 01735 if ( $seq_region_end < 1 01736 || $seq_region_start > $dest_slice_length 01737 || ( $dest_slice_sr_id ne $seq_region_id ) ) 01738 { 01739 next FEATURE; 01740 } 01741 01742 $slice = $dest_slice; 01743 } ## end if ( defined($dest_slice...)) 01744 01745 my $display_xref; 01746 01747 if ($display_xref_id) { 01748 $display_xref = Bio::EnsEMBL::DBEntry->new_fast( { 01749 'dbID' => $display_xref_id, 01750 'adaptor' => $dbEntryAdaptor, 01751 'display_id' => $xref_display_id, 01752 'primary_id' => $xref_primary_acc, 01753 'version' => $xref_version, 01754 'description' => $xref_desc, 01755 'release' => $external_release, 01756 'dbname' => $external_db, 01757 'db_display_name' => $external_db_name, 01758 'info_type' => $info_type, 01759 'info_text' => $info_text 01760 } ); 01761 $display_xref->status($external_status); 01762 } 01763 01764 # Finally, create the new Gene. 01765 push( 01766 @genes, 01767 $self->_create_feature_fast( 01768 'Bio::EnsEMBL::Gene', 01769 { 01770 'analysis' => $analysis, 01771 'biotype' => $biotype, 01772 'start' => $seq_region_start, 01773 'end' => $seq_region_end, 01774 'strand' => $seq_region_strand, 01775 'adaptor' => $self, 01776 'slice' => $slice, 01777 'dbID' => $gene_id, 01778 'stable_id' => $stable_id, 01779 'version' => $version, 01780 'created_date' => $created_date || undef, 01781 'modified_date' => $modified_date || undef, 01782 'description' => $gene_description, 01783 'external_name' => undef, # will use display_id 01784 # from display_xref 01785 'external_db' => $external_db, 01786 'external_status' => $external_status, 01787 'display_xref' => $display_xref, 01788 'status' => $status, 01789 'source' => $source, 01790 'is_current' => $is_current, 01791 'canonical_transcript_id' => $canonical_transcript_id, 01792 'canonical_annotation' => $canonical_annotation 01793 } ) ); 01794 01795 } 01796 01797 return \@genes; 01798 } 01799 01800 01801 =head2 cache_gene_seq_mappings 01802 01803 Example : $gene_adaptor->cache_gene_seq_mappings(); 01804 Description: caches all the assembly mappings needed for genes 01805 Returntype : None 01806 Exceptions : None 01807 Caller : general 01808 Status : At Risk 01809 : New experimental code 01810 01811 =cut 01812 01813 sub cache_gene_seq_mappings { 01814 my ($self) = @_; 01815 01816 # get the sequence level to map too 01817 01818 my $sql = 01819 'SELECT name ' 01820 . 'FROM coord_system ' 01821 . 'WHERE attrib like "%%sequence_level%%"' 01822 . 'AND species_id = ?'; 01823 01824 my $sth = $self->prepare($sql); 01825 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); 01826 $sth->execute(); 01827 01828 my $sequence_level = $sth->fetchrow_array(); 01829 01830 $sth->finish(); 01831 01832 my $csa = $self->db->get_CoordSystemAdaptor(); 01833 my $ama = $self->db->get_AssemblyMapperAdaptor(); 01834 01835 my $cs1 = $csa->fetch_by_name($sequence_level); 01836 01837 # get level to map to two 01838 01839 my $mcc = $self->db->get_MetaCoordContainerAdaptor(); 01840 my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene'); 01841 01842 foreach my $cs2 (@$csnew) { 01843 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 ); 01844 $am->register_all(); 01845 } 01846 01847 } ## end sub cache_gene_seq_mappings 01848 01849 01850 =head2 fetch_all_by_exon_supporting_evidence 01851 01852 Arg [1] : String $hit_name 01853 Name of supporting feature 01854 Arg [2] : String $feature_type 01855 one of "dna_align_feature" or "protein_align_feature" 01856 Arg [3] : (optional) Bio::Ensembl::Analysis 01857 Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence( 01858 'XYZ', 'dna_align_feature'); 01859 Description: Gets all the genes with transcripts with exons which have a 01860 specified hit on a particular type of feature. Optionally filter 01861 by analysis. 01862 Returntype : Listref of Bio::EnsEMBL::Gene 01863 Exceptions : If feature_type is not of correct type. 01864 Caller : general 01865 Status : Stable 01866 01867 =cut 01868 01869 sub fetch_all_by_exon_supporting_evidence { 01870 my ($self, $hit_name, $feature_type, $analysis) = @_; 01871 01872 if ($feature_type !~ /(dna)|(protein)_align_feature/) { 01873 throw("feature type must be dna_align_feature or protein_align_feature"); 01874 } 01875 01876 my $anal_from = ", analysis a " if ($analysis); 01877 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); 01878 01879 my $sql = qq( 01880 SELECT DISTINCT(g.gene_id) 01881 FROM gene g, 01882 transcript t, 01883 exon_transcript et, 01884 supporting_feature sf, 01885 $feature_type f 01886 $anal_from 01887 WHERE g.gene_id = t.gene_id 01888 AND g.is_current = 1 01889 AND t.transcript_id = et.transcript_id 01890 AND et.exon_id = sf.exon_id 01891 AND sf.feature_id = f.${feature_type}_id 01892 AND sf.feature_type = ? 01893 AND f.hit_name=? 01894 $anal_where 01895 ); 01896 01897 my $sth = $self->prepare($sql); 01898 01899 $sth->bind_param(1, $feature_type, SQL_VARCHAR); 01900 $sth->bind_param(2, $hit_name, SQL_VARCHAR); 01901 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); 01902 01903 $sth->execute(); 01904 01905 my @genes; 01906 01907 while ( my $id = $sth->fetchrow_array ) { 01908 my $gene = $self->fetch_by_dbID($id); 01909 push(@genes, $gene) if $gene; 01910 } 01911 01912 return \@genes; 01913 } 01914 01915 01916 =head2 fetch_all_by_transcript_supporting_evidence 01917 01918 Arg [1] : String $hit_name 01919 Name of supporting feature 01920 Arg [2] : String $feature_type 01921 one of "dna_align_feature" or "protein_align_feature" 01922 Arg [3] : (optional) Bio::Ensembl::Analysis 01923 Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature'); 01924 Description: Gets all the genes with transcripts with evidence for a 01925 specified hit on a particular type of feature. Optionally filter 01926 by analysis. 01927 Returntype : Listref of Bio::EnsEMBL::Gene. 01928 Exceptions : If feature_type is not of correct type. 01929 Caller : general 01930 Status : Stable 01931 01932 =cut 01933 01934 sub fetch_all_by_transcript_supporting_evidence { 01935 my ($self, $hit_name, $feature_type, $analysis) = @_; 01936 01937 if($feature_type !~ /(dna)|(protein)_align_feature/) { 01938 throw("feature type must be dna_align_feature or protein_align_feature"); 01939 } 01940 01941 my $anal_from = ", analysis a " if ($analysis); 01942 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); 01943 01944 my $sql = qq( 01945 SELECT DISTINCT(g.gene_id) 01946 FROM gene g, 01947 transcript t, 01948 transcript_supporting_feature sf, 01949 $feature_type f 01950 $anal_from 01951 WHERE g.gene_id = t.gene_id 01952 AND g.is_current = 1 01953 AND t.transcript_id = sf.transcript_id 01954 AND sf.feature_id = f.${feature_type}_id 01955 AND sf.feature_type = ? 01956 AND f.hit_name=? 01957 $anal_where 01958 ); 01959 01960 my $sth = $self->prepare($sql); 01961 01962 $sth->bind_param(1, $feature_type, SQL_VARCHAR); 01963 $sth->bind_param(2, $hit_name, SQL_VARCHAR); 01964 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); 01965 01966 $sth->execute(); 01967 01968 my @genes; 01969 01970 while( my $id = $sth->fetchrow_array ) { 01971 my $gene = $self->fetch_by_dbID($id); 01972 push(@genes, $gene) if $gene; 01973 } 01974 01975 return \@genes; 01976 } 01977 01978 =head2 fetch_nearest_Gene_by_Feature 01979 01980 Arg [1] : Feature object 01981 Example : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat); 01982 Description: Gets the nearest gene to the feature 01983 Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest 01984 Caller : general 01985 Status : UnStable 01986 01987 =cut 01988 01989 sub fetch_nearest_Gene_by_Feature{ 01990 my $self = shift; 01991 my $feat = shift; 01992 01993 my $stranded = shift; 01994 my $stream = shift; # 1 up stream -1 downstream 01995 my @genes; 01996 01997 01998 my $strand = $feat->strand; 01999 if(defined($stream) and !$strand){ 02000 warn("stream specified but feature has no strand so +ve strand will be used"); 02001 $strand = 1; 02002 } 02003 my $min_dist = 999; 02004 my $gene_id = 0; 02005 02006 my $overlapping = $feat->get_overlapping_Genes(); 02007 02008 return $overlapping if(defined(@{$overlapping}[0])); 02009 02010 my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice); 02011 my $start = ($feat->start + $feat->slice->start) -1; 02012 my $end = ($feat->end + $feat->slice->start) -1; 02013 02014 02015 my @gene_ids; 02016 if(!defined($stream) or $stream == 0){ 02017 02018 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; 02019 if($stranded){ 02020 $sql1 .= "g.seq_region_strand = ".$strand." and "; 02021 } 02022 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10"; 02023 02024 # 02025 # MAYBE set the result of prepare to be static in case lots of calls. 02026 # 02027 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; 02028 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; 02029 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; 02030 02031 my $last_dist = 99999999999999999; 02032 while($sql1_sth->fetch()){ 02033 if($min_dist <= $last_dist){ 02034 push @gene_ids, $gene_id; 02035 $last_dist = $min_dist; 02036 } 02037 } 02038 $sql1_sth->finish(); 02039 02040 02041 02042 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; 02043 if($stranded){ 02044 $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; 02045 } 02046 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10"; 02047 02048 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; 02049 02050 my ($tmp_min_dist, $tmp_gene_id); 02051 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; 02052 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; 02053 my $first =1; 02054 while($sql2_sth->fetch()){ 02055 if( $tmp_min_dist <= $last_dist){ 02056 if($first){ 02057 $first = 0; 02058 if($tmp_min_dist < $last_dist){ 02059 @gene_ids = (); #reset 02060 } 02061 } 02062 push @gene_ids, $tmp_gene_id; 02063 $last_dist = $tmp_min_dist; 02064 } 02065 } 02066 $sql2_sth->finish(); 02067 02068 02069 } 02070 elsif(($stream*$strand) == 1){ 02071 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; 02072 if($stranded){ 02073 $sql1 .= "g.seq_region_strand = ".$strand." and "; 02074 } 02075 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10"; 02076 02077 # 02078 # MAYBE set the result of prepare to be static in case lots of calls. 02079 # 02080 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; 02081 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; 02082 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; 02083 02084 my $last_dist; 02085 my $first = 1; 02086 while($sql1_sth->fetch()){ 02087 if($first){ 02088 $first = 0; 02089 } 02090 else{ 02091 next if ($min_dist > $last_dist); 02092 } 02093 push @gene_ids, $gene_id; 02094 $last_dist = $min_dist; 02095 } 02096 $sql1_sth->finish(); 02097 } 02098 elsif(($stream * $strand) == -1){ 02099 02100 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; 02101 if($stranded){ 02102 $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; 02103 } 02104 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10"; 02105 02106 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; 02107 02108 my ($tmp_min_dist, $tmp_gene_id); 02109 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; 02110 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; 02111 my $first =1; 02112 my $last_dist; 02113 while($sql2_sth->fetch()){ 02114 if($first){ 02115 $first = 0; 02116 } 02117 else{ 02118 next if ($tmp_min_dist > $last_dist); 02119 } 02120 push @gene_ids, $tmp_gene_id; 02121 $last_dist = $tmp_min_dist; 02122 } 02123 $sql2_sth->finish(); 02124 } 02125 else{ 02126 die "Invalid stream or strand must be -1, 0 or 1\n"; 02127 } 02128 02129 02130 02131 foreach my $gene_id (@gene_ids){ 02132 push @genes, $self->fetch_by_dbID($gene_id); 02133 } 02134 return \@genes; 02135 02136 } 02137 02138 ########################## 02139 # # 02140 # DEPRECATED METHODS # 02141 # # 02142 ########################## 02143 02144 02145 =head2 fetch_by_maximum_DBLink 02146 02147 Description: DEPRECATED - use fetch_all_by_external_name instead 02148 02149 =cut 02150 02151 sub fetch_by_maximum_DBLink { 02152 my ($self, $external_id) = @_; 02153 02154 deprecate( "use fetch_all_by_external_name instead" ); 02155 02156 my $genes=$self->fetch_all_by_external_name($external_id); 02157 02158 my $biggest; 02159 my $max = 0; 02160 my $size = scalar(@$genes); 02161 if ($size > 0) { 02162 foreach my $gene (@$genes) { 02163 my $size = scalar(@{$gene->get_all_Exons}); 02164 if ($size > $max) { 02165 $biggest = $gene; 02166 $max = $size; 02167 } 02168 } 02169 return $biggest; 02170 } 02171 return; 02172 } 02173 02174 02175 =head2 get_display_xref 02176 02177 Description: DEPRECATED use $gene->display_xref 02178 02179 =cut 02180 02181 sub get_display_xref { 02182 my ($self, $gene) = @_; 02183 02184 deprecate( "display xref should retrieved from Gene object directly" ); 02185 02186 if ( !defined $gene ) { 02187 throw("Must call with a Gene object"); 02188 } 02189 02190 my $sth = $self->prepare(qq( 02191 SELECT e.db_name, 02192 x.display_label, 02193 x.xref_id 02194 FROM gene g, 02195 xref x, 02196 external_db e 02197 WHERE g.gene_id = ? 02198 AND g.display_xref_id = x.xref_id 02199 AND x.external_db_id = e.external_db_id 02200 )); 02201 02202 $sth->bind_param(1, $gene->dbID, SQL_INTEGER); 02203 $sth->execute(); 02204 02205 my ($db_name, $display_label, $xref_id) = $sth->fetchrow_array(); 02206 if ( !defined $xref_id ) { 02207 return undef; 02208 } 02209 02210 my $db_entry = Bio::EnsEMBL::DBEntry->new( 02211 -dbid => $xref_id, 02212 -adaptor => $self->db->get_DBEntryAdaptor(), 02213 -dbname => $db_name, 02214 -display_id => $display_label 02215 ); 02216 02217 return $db_entry; 02218 } 02219 02220 02221 =head2 get_description 02222 02223 Description: DEPRECATED, use gene->get_description 02224 02225 =cut 02226 02227 sub get_description { 02228 my ($self, $dbID) = @_; 02229 02230 deprecate( "Gene description should be loaded on gene retrieval. Use gene->get_description()" ); 02231 02232 if ( !defined $dbID ) { 02233 throw("must call with dbID"); 02234 } 02235 02236 my $sth = $self->prepare("SELECT description 02237 FROM gene_description 02238 WHERE gene_id = ?"); 02239 02240 $sth->bind_param(1, $dbID, SQL_INTEGER); 02241 $sth->execute(); 02242 02243 my @array = $sth->fetchrow_array(); 02244 return $array[0]; 02245 } 02246 02247 02248 =head2 fetch_by_Peptide_id 02249 02250 Description: DEPRECATED, use fetch_by_translation_stable_id() 02251 02252 =cut 02253 02254 sub fetch_by_Peptide_id { 02255 my ( $self, $translation_stable_id) = @_; 02256 02257 deprecate( "Please use better named fetch_by_translation_stable_id \n". 02258 caller(2) ); 02259 02260 $self->fetch_by_translation_stable_id($translation_stable_id); 02261 } 02262 02263 02264 =head2 get_stable_entry_info 02265 02266 Description: DEPRECATED use $gene->stable_id instead 02267 02268 =cut 02269 02270 sub get_stable_entry_info { 02271 my ($self,$gene) = @_; 02272 02273 deprecated("stable id info is loaded on default, no lazy loading necessary"); 02274 02275 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { 02276 throw("Needs a gene object, not a $gene"); 02277 } 02278 02279 my $created_date = $self->db->dbc->from_date_to_seconds("created_date"); 02280 my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date"); 02281 02282 my $sth = 02283 $self->prepare( "SELECT stable_id, " 02284 . $created_date . "," 02285 . $modified_date 02286 . ", version FROM gene WHERE gene_id = ?" ); 02287 02288 $sth->bind_param(1, $gene->dbID, SQL_INTEGER); 02289 $sth->execute(); 02290 02291 my @array = $sth->fetchrow_array(); 02292 $gene->{'stable_id'} = $array[0]; 02293 $gene->{'created'} = $array[1]; 02294 $gene->{'modified'} = $array[2]; 02295 $gene->{'version'} = $array[3]; 02296 02297 return 1; 02298 } 02299 02300 02301 =head2 fetch_all_by_DBEntry 02302 02303 Description: DEPRECATED - Use fetch_all_by_external_name instead 02304 02305 =cut 02306 02307 sub fetch_all_by_DBEntry { 02308 my $self = shift; 02309 02310 deprecate('Use fetch_all_by_external_name instead.'); 02311 02312 return $self->fetch_all_by_external_name(@_); 02313 } 02314 02315 02316 1; 02317 02318