Archive Ensembl HomeArchive Ensembl Home
SimpleFeatureAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor
00024 
00025 =head1 SYNOPSIS
00026 
00027   my $simple_feature_adaptor =
00028     $database_adaptor->get_SimpleFeatureAdaptor();
00029 
00030   @simple_features =
00031     @{ $simple_feature_adaptor->fetch_all_by_Slice($slice) };
00032 
00033 =head1 DESCRIPTION
00034 
00035 Simple Feature Adaptor - database access for simple features
00036 
00037 =head1 METHODS
00038 
00039 =cut
00040 
00041 package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor;
00042 use vars qw(@ISA);
00043 use strict;
00044 
00045 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
00046 use Bio::EnsEMBL::SimpleFeature;
00047 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00048 
00049 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
00050 
00051 
00052 =head2 store
00053 
00054   Arg [1]    : list of Bio::EnsEMBL::SimpleFeatures @sf
00055                the simple features to store in the database
00056   Example    : $simple_feature_adaptor->store(@simple_feats);
00057   Description: Stores a list of simple feature objects in the database
00058   Returntype : none
00059   Exceptions : thrown if @sf is not defined, if any of the features do not
00060                have an attached slice.
00061                or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures 
00062   Caller     : general
00063   Status     : Stable
00064 
00065 =cut
00066 
00067 sub store{
00068   my ($self,@sf) = @_;
00069 
00070   if( scalar(@sf) == 0 ) {
00071     throw("Must call store with list of SimpleFeatures");
00072   }
00073 
00074   my $sth = $self->prepare
00075     ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " .
00076                                  "seq_region_end, seq_region_strand, " .
00077                                  "display_label, analysis_id, score) " .
00078      "VALUES (?,?,?,?,?,?,?)");
00079 
00080   my $db = $self->db();
00081   my $analysis_adaptor = $db->get_AnalysisAdaptor();
00082 
00083  FEATURE: foreach my $sf ( @sf ) {
00084 
00085     if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) {
00086       throw("SimpleFeature must be an Ensembl SimpleFeature, " .
00087             "not a [".ref($sf)."]");
00088     }
00089 
00090     if($sf->is_stored($db)) {
00091       warning("SimpleFeature [".$sf->dbID."] is already stored" .
00092               " in this database.");
00093       next FEATURE;
00094     }
00095 
00096     if(!defined($sf->analysis)) {
00097       throw("An analysis must be attached to the features to be stored.");
00098     }
00099 
00100     #store the analysis if it has not been stored yet
00101     if(!$sf->analysis->is_stored($db)) {
00102       $analysis_adaptor->store($sf->analysis());
00103     }
00104 
00105     my $original = $sf;
00106     my $seq_region_id;
00107     ($sf, $seq_region_id) = $self->_pre_store($sf);
00108 
00109     $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
00110     $sth->bind_param(2,$sf->start,SQL_INTEGER);
00111     $sth->bind_param(3,$sf->end,SQL_INTEGER);
00112     $sth->bind_param(4,$sf->strand,SQL_TINYINT);
00113     $sth->bind_param(5,$sf->display_label,SQL_VARCHAR);
00114     $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER);
00115     $sth->bind_param(7,$sf->score,SQL_DOUBLE);
00116 
00117     $sth->execute();
00118 
00119     $original->dbID($sth->{'mysql_insertid'});
00120     $original->adaptor($self);
00121   }
00122 }
00123 
00124 
00125 =head2 _tables
00126 
00127   Arg [1]    : none
00128   Example    : none
00129   Description: PROTECTED implementation of superclass abstract method
00130                returns the names, aliases of the tables to use for queries
00131   Returntype : list of listrefs of strings
00132   Exceptions : none
00133   Caller     : internal
00134   Status     : Stable
00135 
00136 =cut
00137 
00138 sub _tables {
00139   my $self = shift;
00140   
00141   return ['simple_feature', 'sf'];
00142 }
00143 
00144 
00145 =head2 _columns
00146 
00147   Arg [1]    : none
00148   Example    : none
00149   Description: PROTECTED implementation of superclass abstract method
00150                returns a list of columns to use for queries
00151   Returntype : list of strings
00152   Exceptions : none
00153   Caller     : internal
00154   Status     : Stable
00155 
00156 =cut
00157 
00158 sub _columns {
00159   my $self = shift;
00160 
00161   return qw( sf.simple_feature_id
00162              sf.seq_region_id sf.seq_region_start sf.seq_region_end
00163              sf.seq_region_strand sf.display_label sf.analysis_id sf.score );
00164 }
00165 
00166 
00167 =head2 _objs_from_sth
00168 
00169   Arg [1]    : hash reference $hashref
00170   Example    : none
00171   Description: PROTECTED implementation of superclass abstract method.
00172                creates SimpleFeatures from an executed DBI statement handle.
00173   Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects
00174   Exceptions : none
00175   Caller     : internal
00176   Status     : Stable
00177 
00178 =cut
00179 
00180 sub _objs_from_sth {
00181   my ($self, $sth, $mapper, $dest_slice) = @_;
00182 
00183   #
00184   # This code is ugly because an attempt has been made to remove as many
00185   # function calls as possible for speed purposes.  Thus many caches and
00186   # a fair bit of gymnastics is used.
00187   #
00188 
00189   my $sa = $self->db()->get_SliceAdaptor();
00190   my $aa = $self->db->get_AnalysisAdaptor();
00191 
00192   my @features;
00193   my %analysis_hash;
00194   my %slice_hash;
00195   my %sr_name_hash;
00196   my %sr_cs_hash;
00197   
00198   
00199   my($simple_feature_id,$seq_region_id, $seq_region_start, $seq_region_end,
00200      $seq_region_strand, $display_label, $analysis_id, $score);
00201 
00202   $sth->bind_columns(\$simple_feature_id,\$seq_region_id, \$seq_region_start,
00203                      \$seq_region_end, \$seq_region_strand, \$display_label,
00204                      \$analysis_id, \$score);
00205   
00206   my $asm_cs;
00207   my $cmp_cs;
00208   my $asm_cs_vers;
00209   my $asm_cs_name;
00210   my $cmp_cs_vers;
00211   my $cmp_cs_name;
00212   if($mapper) {
00213     $asm_cs = $mapper->assembled_CoordSystem();
00214     $cmp_cs = $mapper->component_CoordSystem();
00215     $asm_cs_name = $asm_cs->name();
00216     $asm_cs_vers = $asm_cs->version();
00217     $cmp_cs_name = $cmp_cs->name();
00218     $cmp_cs_vers = $cmp_cs->version();
00219   }
00220 
00221   my $dest_slice_start;
00222   my $dest_slice_end;
00223   my $dest_slice_strand;
00224   my $dest_slice_length;
00225   my $dest_slice_sr_name;
00226   my $dest_slice_seq_region_id;
00227   if($dest_slice) {
00228     $dest_slice_start  = $dest_slice->start();
00229     $dest_slice_end    = $dest_slice->end();
00230     $dest_slice_strand = $dest_slice->strand();
00231     $dest_slice_length = $dest_slice->length();
00232     $dest_slice_sr_name = $dest_slice->seq_region_name();
00233     $dest_slice_seq_region_id =$dest_slice->get_seq_region_id();
00234   }
00235 
00236   my $count =0;
00237   FEATURE: while($sth->fetch()) {
00238       $count++;
00239     #get the analysis object
00240     my $analysis = $analysis_hash{$analysis_id} ||=
00241       $aa->fetch_by_dbID($analysis_id);
00242 
00243     #need to get the internal_seq_region, if present
00244     $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
00245     #get the slice object
00246     my $slice = $slice_hash{"ID:".$seq_region_id};
00247 
00248     if(!$slice) {
00249       $slice = $sa->fetch_by_seq_region_id($seq_region_id);
00250       $slice_hash{"ID:".$seq_region_id} = $slice;
00251       $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
00252       $sr_cs_hash{$seq_region_id} = $slice->coord_system();
00253     }
00254 
00255     my $sr_name = $sr_name_hash{$seq_region_id};
00256     my $sr_cs   = $sr_cs_hash{$seq_region_id};
00257     #
00258     # remap the feature coordinates to another coord system
00259     # if a mapper was provided
00260     #
00261     if($mapper) {
00262 
00263       ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
00264         $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
00265                           $seq_region_strand, $sr_cs);
00266 
00267       #skip features that map to gaps or coord system boundaries
00268       next FEATURE if(!defined($seq_region_id));
00269       
00270       #get a slice in the coord system we just mapped to
00271       if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
00272         $slice = $slice_hash{"ID:".$seq_region_id} ||=
00273           $sa->fetch_by_seq_region_id($seq_region_id);
00274       } else {
00275         $slice = $slice_hash{"ID:".$seq_region_id} ||=
00276           $sa->fetch_by_seq_region_id($seq_region_id);
00277       }
00278     }
00279 
00280     #
00281     # If a destination slice was provided convert the coords
00282     # If the dest_slice starts at 1 and is foward strand, nothing needs doing
00283     #
00284     if($dest_slice) {
00285       if($dest_slice_start != 1 || $dest_slice_strand != 1) {
00286         if($dest_slice_strand == 1) {
00287           $seq_region_start = $seq_region_start - $dest_slice_start + 1;
00288           $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
00289         } else {
00290           my $tmp_seq_region_start = $seq_region_start;
00291           $seq_region_start = $dest_slice_end - $seq_region_end + 1;
00292           $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
00293           $seq_region_strand *= -1;
00294         }
00295       }
00296        
00297       #throw away features off the end of the requested slice
00298       if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
00299     ( $dest_slice_seq_region_id != $seq_region_id )) {
00300 #   print STDERR "IGNORED DUE TO CUTOFF  $dest_slice_seq_region_id ne $seq_region_id . $sr_name\n";
00301     next FEATURE;
00302       }
00303       $slice = $dest_slice;
00304     }
00305 
00306     push( @features,
00307           $self->_create_feature_fast(
00308                                     'Bio::EnsEMBL::SimpleFeature', {
00309                                       'start'    => $seq_region_start,
00310                                       'end'      => $seq_region_end,
00311                                       'strand'   => $seq_region_strand,
00312                                       'slice'    => $slice,
00313                                       'analysis' => $analysis,
00314                                       'adaptor'  => $self,
00315                                       'dbID'     => $simple_feature_id,
00316                                       'display_label' => $display_label,
00317                                       'score'         => $score
00318                                     } ) );
00319 
00320     }
00321 
00322   return \@features;
00323 }
00324 
00325 
00326 =head2 list_dbIDs
00327 
00328   Arg [1]    : none
00329   Example    : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()};
00330   Description: Gets an array of internal ids for all simple features in the current db
00331   Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
00332   Returntype : list of ints
00333   Exceptions : none
00334   Caller     : ?
00335   Status     : Stable
00336 
00337 =cut
00338 
00339 sub list_dbIDs {
00340    my ($self, $ordered) = @_;
00341 
00342    return $self->_list_dbIDs("simple_feature", undef, $ordered);
00343 }
00344 
00345 1;