Archive Ensembl HomeArchive Ensembl Home
MiscFeatureAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::MiscFeatureAdaptor
00024 
00025 =head1 SYNOPSIS
00026 
00027   $mfa = $database_adaptor->get_MiscFeatureAdaptor();
00028 
00029   # retrieve a misc feature by its dbID
00030   my $misc_feat = $mfa->fetch_by_dbID(1234);
00031 
00032   # retrieve all misc features in a given region
00033   my @misc_feats = @{ $mfa->fetch_all_by_Slice($slice) };
00034 
00035   # retrieve all misc features in a given region with a given set code
00036   my @misc_clones =
00037     @{ $mfa->fetch_all_by_Slice_and_set_code('cloneset') };
00038 
00039   # store some misc features in the database
00040   $mfa->store(@misc_features);
00041 
00042 =head1 DESCRIPTION
00043 
00044 This is an adaptor for the retrieval and storage of MiscFeatures.
00045 Misc Features are extremely generic features that can be added with
00046 minimal effort to the database.  Currently misc features are used to
00047 describe the locations of clone sets and tiling path information,
00048 but arbitrary features can be stored.  Misc features are grouped
00049 into sets and can be fetched according to their grouping using the
00050 fetch_all_by_Slice_and_set_code and fetch_all_by_set_code methods.
00051 MiscFeatures may belong to more than one set.
00052 
00053 =head1 METHODS
00054 
00055 =cut
00056 
00057 package Bio::EnsEMBL::DBSQL::MiscFeatureAdaptor;
00058 
00059 use strict;
00060 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
00061 use Bio::EnsEMBL::MiscFeature;
00062 use Bio::EnsEMBL::Attribute;
00063 use Bio::EnsEMBL::MiscSet;
00064 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00065 
00066 use vars qw(@ISA);
00067 
00068 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
00069 
00070 
00071 
00072 =head2 fetch_all_by_Slice_and_set_code
00073 
00074   Arg [1]    : Bio::EnsEMBL::Slice $slice
00075                A slice representing the region to fetch from
00076   Arg [2...] : string $set_code
00077                The code of the set to retrieve features from
00078   Example    : @feats = @{$mfa->fetch_all_by_Slice_and_set_code('cloneset')};
00079   Description: Retrieves a set of MiscFeatures which have a particular set code
00080                and which lie in a particular region.  All features with the
00081                provide set code and which overlap the given slice are returned.
00082   Returntype : listref of Bio::EnsEMBL::MiscFeatures
00083   Exceptions : throw if set_code is not provided
00084                warning if no set for provided set code exists
00085   Caller     : general
00086   Status     : Stable
00087 
00088 =cut
00089 
00090 sub fetch_all_by_Slice_and_set_code {
00091   my $self = shift;
00092   my $slice = shift;
00093 
00094   throw('Set code argument is required.') unless @_;
00095 
00096   my $msa = $self->db->get_MiscSetAdaptor();
00097   my @sets = ();
00098   my $max_len = 0;
00099   foreach my $set_code (@_) {
00100     my $set = $msa->fetch_by_code($set_code);
00101     if($set) {
00102       $max_len = $set->longest_feature if $set->longest_feature > $max_len;
00103       push @sets, $set->dbID;
00104     } else { 
00105       warning("No misc_set with code [$set_code] exists");
00106     }
00107   }
00108   my $constraint;
00109   if( @sets > 1 ) {
00110     $constraint = " mfms.misc_set_id in ( @{[join ',',@sets]} ) ";
00111   } elsif( @sets == 1 ) {
00112     $constraint = " mfms.misc_set_id = $sets[0] ";
00113   } else {
00114     return [];
00115   }
00116 
00117   $self->_max_feature_length($max_len);
00118 
00119   my $results = $self->fetch_all_by_Slice_constraint($slice, $constraint);
00120 
00121   $self->_max_feature_length(undef);
00122 
00123   return $results;
00124 }
00125 
00126 
00127 
00128 =head2 fetch_all_by_attribute_type_value
00129 
00130   Arg [1]    : string $attrib_type_code
00131                The code of the attribute type to fetch features for
00132   Arg [2]    : (optional) string $attrib_value
00133                The value of the attribute to fetch features for
00134   Example    : 
00135          #get all misc features that have an embl accession
00136          @feats = @{$mfa->fetch_all_by_attrib_type_value('embl_acc')};
00137          #get the misc feature with synonym 'AL014121'
00138          ($feat)=@{$mfa->fetch_all_by_attrib_type_value('synonym','AL014121');
00139   Description: Retrieves MiscFeatures which have a particular attribute.
00140                If the attribute value argument is also provided only
00141                features which have the attribute AND a particular value
00142                are returned.  The features are returned in their native
00143                coordinate system (i.e. the coordinate system that they
00144                are stored in).
00145   Returntype : listref of Bio::EnsEMBL::MiscFeatures
00146   Exceptions : throw if attrib_type code arg is not provided
00147   Caller     : general
00148   Status     : Stable
00149 
00150 =cut
00151 
00152 sub fetch_all_by_attribute_type_value {
00153   my $self             = shift;
00154   my $attrib_type_code = shift;
00155   my $attrib_value     = shift;
00156 
00157   throw("Attrib type code argument is required.")
00158     if ( !$attrib_type_code );
00159 
00160   # Need to do 2 queries so that all of the ids come back with the
00161   # features.  The problem with adding attrib constraints to filter the
00162   # misc_features which come back is that not all of the attributes will
00163   # come back
00164 
00165   my $sql = qq(
00166   SELECT DISTINCT
00167         ma.misc_feature_id
00168   FROM  misc_attrib ma,
00169         attrib_type at,
00170         misc_feature mf,
00171         seq_region sr,
00172         coord_system cs
00173   WHERE ma.attrib_type_id = at.attrib_type_id
00174     AND at.code = ?
00175     AND ma.misc_feature_id = mf.misc_feature_id
00176     AND mf.seq_region_id = sr.seq_region_id
00177     AND sr.coord_system_id = cs.coord_system_id
00178     AND cs.species_id = ?);
00179 
00180   if ($attrib_value) {
00181     $sql .= " AND ma.value = ?";
00182   }
00183 
00184   my $sth = $self->prepare($sql);
00185 
00186   $sth->bind_param( 1, $attrib_type_code,   SQL_VARCHAR );
00187   $sth->bind_param( 2, $self->species_id(), SQL_INTEGER );
00188   if ($attrib_value) {
00189     $sth->bind_param( 3, $attrib_value, SQL_VARCHAR );
00190   }
00191 
00192   $sth->execute();
00193 
00194   my @ids = map { $_->[0] } @{ $sth->fetchall_arrayref() };
00195 
00196   $sth->finish();
00197 
00198   # Construct constraints from the list of ids.  Split ids into groups
00199   # of 1000 to ensure that the query is not too big.
00200   my @constraints;
00201   while (@ids) {
00202     my @subset = splice( @ids, 0, 1000 );
00203     if ( @subset == 1 ) {
00204       push @constraints, "mf.misc_feature_id = $subset[0]";
00205     } else {
00206       my $id_str = join( ',', @subset );
00207       push @constraints, "mf.misc_feature_id in ($id_str)";
00208     }
00209   }
00210 
00211   my @results;
00212   foreach my $constraint (@constraints) {
00213     push @results, @{ $self->generic_fetch($constraint) };
00214   }
00215 
00216   return \@results;
00217 } ## end sub fetch_all_by_attribute_type_value
00218 
00219 
00220 #_tables
00221 #
00222 #  Arg [1]    : none
00223 #  Example    : none
00224 #  Description: PROTECTED Implementation of abstract superclass method to
00225 #               provide the name of the tables to query
00226 #  Returntype : string
00227 #  Exceptions : none
00228 #  Caller     : internal
00229 
00230 
00231 sub _tables {
00232   my $self = shift;
00233 
00234   return (['misc_feature',          'mf'],
00235           ['misc_feature_misc_set', 'mfms'],
00236           ['misc_attrib',           'ma'],
00237           ['attrib_type',           'at']);
00238 }
00239 
00240 
00241 #_columns
00242 
00243 #  Arg [1]    : none
00244 #  Example    : none
00245 #  Description: PROTECTED Implementation of abstract superclass method to 
00246 #               provide the name of the columns to query 
00247 #  Returntype : list of strings
00248 #  Exceptions : none
00249 #  Caller     : internal
00250 
00251 sub _columns {
00252   my $self = shift;
00253 
00254   #warning _objs_from_sth implementation depends on ordering
00255   return qw (mf.misc_feature_id
00256          mf.seq_region_id
00257          mf.seq_region_start
00258          mf.seq_region_end
00259          mf.seq_region_strand
00260          ma.value
00261          at.code
00262          mfms.misc_set_id
00263          at.name
00264          at.description);
00265 }
00266 
00267 
00268 
00269 # _default_where_clause
00270 
00271 #  Arg [1]    : none
00272 #  Example    : none
00273 #  Description: Overrides superclass method to provide an additional 
00274 #               table joining constraint before the SQL query is performed.
00275 #  Returntype : string
00276 #  Exceptions : none
00277 #  Caller     : generic_fetch
00278 
00279 sub _default_where_clause {
00280   my $self = shift;
00281 
00282   return '';
00283 }
00284 
00285 
00286 sub _left_join {
00287   my $self = shift;
00288 
00289   return(
00290       ['misc_feature_misc_set','mf.misc_feature_id = mfms.misc_feature_id'],
00291       ['misc_attrib', 'mf.misc_feature_id = ma.misc_feature_id'],
00292       ['attrib_type','ma.attrib_type_id = at.attrib_type_id']);
00293 }
00294 
00295 
00296 sub _final_clause {
00297   my $self = shift;
00298 
00299   return " ORDER BY mf.misc_feature_id";
00300 }
00301 
00302 
00303 # _objs_from_sth
00304 
00305 #  Arg [1]    : StatementHandle $sth
00306 #  Example    : none
00307 #  Description: PROTECTED implementation of abstract superclass method.
00308 #               responsible for the creation of MiscFeatures from a
00309 #               hashref generated from an SQL query
00310 #  Returntype : listref of Bio::EnsEMBL::MiscFeatures
00311 #  Exceptions : none
00312 #  Caller     : internal
00313 
00314 sub _objs_from_sth {
00315   my ($self, $sth, $mapper, $dest_slice) = @_;
00316 
00317   #
00318   # This code is ugly because an attempt has been made to remove as many
00319   # function calls as possible for speed purposes.  Thus many caches and
00320   # a fair bit of gymnastics is used.
00321   #
00322 
00323   my $sa = $self->db()->get_SliceAdaptor();
00324   my $msa = $self->db->get_MiscSetAdaptor();
00325 
00326   my @features;
00327   my %ms_hash;
00328   my %slice_hash;
00329   my %sr_name_hash;
00330   my %sr_cs_hash;
00331 
00332   my($misc_feature_id, $seq_region_id, $seq_region_start, $seq_region_end,
00333      $seq_region_strand, $attrib_value, $attrib_type_code, $misc_set_id,
00334      $attrib_type_name, $attrib_type_description );
00335 
00336   $sth->bind_columns( \$misc_feature_id, \$seq_region_id, \$seq_region_start,
00337                       \$seq_region_end, \$seq_region_strand,
00338                       \$attrib_value, \$attrib_type_code,\$misc_set_id,
00339               \$attrib_type_name, \$attrib_type_description );
00340 
00341   my $asm_cs;
00342   my $cmp_cs;
00343   my $asm_cs_vers;
00344   my $asm_cs_name;
00345   my $cmp_cs_vers;
00346   my $cmp_cs_name;
00347   if($mapper) {
00348     $asm_cs = $mapper->assembled_CoordSystem();
00349     $cmp_cs = $mapper->component_CoordSystem();
00350     $asm_cs_name = $asm_cs->name();
00351     $asm_cs_vers = $asm_cs->version();
00352     $cmp_cs_name = $cmp_cs->name();
00353     $cmp_cs_vers = $cmp_cs->version();
00354   }
00355 
00356   my $dest_slice_start;
00357   my $dest_slice_end;
00358   my $dest_slice_strand;
00359   my $dest_slice_length;
00360   my $dest_slice_sr_name;
00361   my $dest_slice_sr_id;
00362   if($dest_slice) {
00363     $dest_slice_start  = $dest_slice->start();
00364     $dest_slice_end    = $dest_slice->end();
00365     $dest_slice_strand = $dest_slice->strand();
00366     $dest_slice_length = $dest_slice->length();
00367     $dest_slice_sr_name = $dest_slice->seq_region_name();
00368     $dest_slice_sr_id  = $dest_slice->get_seq_region_id();
00369   }
00370 
00371   my $current = -1;
00372   my $throw_away = -1;
00373   my $feat;
00374   my $feat_misc_sets;
00375   my $feat_attribs;
00376   my $seen_attribs;
00377 
00378  FEATURE: while($sth->fetch()) {
00379     #if this feature is not being used, skip all rows related to it
00380     next if($throw_away == $misc_feature_id);
00381 
00382     if ($current == $misc_feature_id) {
00383       #still working on building up attributes and sets for current feature
00384 
00385       #if there is a misc_set, add it to the current feature
00386       if ($misc_set_id) {
00387         my $misc_set = $ms_hash{$misc_set_id} ||=
00388           $msa->fetch_by_dbID($misc_set_id);
00389         if ( ! exists $feat_misc_sets->{$misc_set->{'code'}} ) {
00390           $feat->add_MiscSet( $misc_set );
00391           $feat_misc_sets->{$misc_set->{'code'}} = $misc_set;
00392         }
00393       }
00394 
00395       #if there is a new attribute add it to the current feature
00396       if ($attrib_value && $attrib_type_code &&
00397           !$seen_attribs->{"$attrib_type_code:$attrib_value"}) {
00398         my $attrib = Bio::EnsEMBL::Attribute->new
00399           ( -CODE => $attrib_type_code,
00400             -NAME => $attrib_type_name,
00401             -DESC => $attrib_type_description,
00402             -VALUE => $attrib_value
00403           );
00404     
00405     
00406         $feat_attribs ||= [];
00407         push @$feat_attribs, $attrib;
00408         $seen_attribs->{"$attrib_type_code:$attrib_value"} = 1;
00409       }
00410 
00411     } else {
00412       if ($feat) {
00413         #start working on a new feature, discard references to last one
00414         $feat = {};
00415         $feat_attribs = [];
00416         $feat_misc_sets = {};
00417         $seen_attribs = {};
00418       }
00419 
00420       $current = $misc_feature_id;
00421       #need to get the internal_seq_region, if present
00422       $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
00423       my $slice = $slice_hash{"ID:".$seq_region_id};
00424 
00425       if (!$slice) {
00426         $slice = $sa->fetch_by_seq_region_id($seq_region_id);
00427         $slice_hash{"ID:".$seq_region_id} = $slice;
00428         $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
00429         $sr_cs_hash{$seq_region_id} = $slice->coord_system();
00430       }
00431 
00432       my $sr_name = $sr_name_hash{$seq_region_id};
00433       my $sr_cs   = $sr_cs_hash{$seq_region_id};
00434       #
00435       # remap the feature coordinates to another coord system
00436       # if a mapper was provided
00437       #
00438       if ($mapper) {
00439 
00440         ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
00441           $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
00442                            $seq_region_strand, $sr_cs);
00443 
00444         #skip features that map to gaps or coord system boundaries
00445         if(!defined($seq_region_id)) {
00446           $throw_away = $misc_feature_id;
00447           next FEATURE;
00448         }
00449 
00450         #get a slice in the coord system we just mapped to
00451 #        if ($asm_cs == $sr_cs ||
00452 #            ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
00453           $slice = $slice_hash{"ID:".$seq_region_id} ||=
00454             $sa->fetch_by_seq_region_id($seq_region_id);
00455 #        } else {
00456 #          $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
00457 #            $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
00458 #                                 $asm_cs_vers);
00459 #        }
00460       }
00461 
00462       #
00463       # If a destination slice was provided convert the coords
00464       # If the dest_slice starts at 1 and is foward strand, nothing needs doing
00465       #
00466       if ($dest_slice) {
00467         if ($dest_slice_start != 1 || $dest_slice_strand != 1) {
00468           if ($dest_slice_strand == 1) {
00469             $seq_region_start = $seq_region_start - $dest_slice_start + 1;
00470             $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
00471           } else {
00472             my $tmp_seq_region_start = $seq_region_start;
00473             $seq_region_start = $dest_slice_end - $seq_region_end + 1;
00474             $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
00475             $seq_region_strand *= -1;
00476           }
00477     }
00478     #throw away features off the end of the requested slice
00479     if ($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
00480        ( $dest_slice_sr_id ne $seq_region_id )) {
00481       #flag this feature as one to throw away
00482       $throw_away = $misc_feature_id;
00483       next FEATURE;
00484     }
00485         $slice = $dest_slice;
00486       }
00487 
00488 
00489       if ($attrib_value && $attrib_type_code) {
00490         my $attrib = Bio::EnsEMBL::Attribute->new
00491           ( -CODE => $attrib_type_code,
00492             -NAME => $attrib_type_name,
00493             -DESC => $attrib_type_description,
00494             -VALUE => $attrib_value
00495           );
00496         $feat_attribs = [$attrib];
00497         $seen_attribs->{"$attrib_type_code:$attrib_value"} = 1;
00498       }
00499 
00500       $feat =
00501         $self->_create_feature_fast( 'Bio::EnsEMBL::MiscFeature', {
00502                                        'start'   => $seq_region_start,
00503                                        'end'     => $seq_region_end,
00504                                        'strand'  => $seq_region_strand,
00505                                        'slice'   => $slice,
00506                                        'adaptor' => $self,
00507                                        'dbID'    => $misc_feature_id,
00508                                        'attributes' => $feat_attribs
00509                                      } );
00510 
00511       push @features, $feat;
00512 
00513       if ($misc_set_id) {
00514         #get the misc_set object
00515         my $misc_set = $ms_hash{$misc_set_id} ||=
00516           $msa->fetch_by_dbID($misc_set_id);
00517         if ( ! exists $feat_misc_sets->{$misc_set->{'code'}} ) {
00518           $feat->add_MiscSet( $misc_set );
00519           $feat_misc_sets->{$misc_set->{'code'}} = $misc_set;
00520         }
00521       }
00522     }
00523   }
00524 
00525   return \@features;
00526 }
00527 
00528 
00529 
00530 =head2 list_dbIDs
00531 
00532   Arg [1]    : none
00533   Example    : @feature_ids = @{$misc_feature_adaptor->list_dbIDs()};
00534   Description: Gets an array of internal ids for all misc_features in the 
00535                current db
00536   Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
00537   Returntype : list of ints
00538   Exceptions : none
00539   Caller     : ?
00540   Status     : Stable
00541 
00542 =cut
00543 
00544 sub list_dbIDs {
00545    my ($self,$ordered) = @_;
00546 
00547    return $self->_list_dbIDs("misc_feature",undef,$ordered);
00548 }
00549 
00550 
00551 =head2 store
00552 
00553   Arg [1]    : list of Bio::EnsEMBL::MiscFeatures @misc_features
00554   Example    : $misc_feature_adaptor->store(@misc_features);
00555   Description: Stores a list of MiscFeatures in this database.  The stored
00556                features will have their 
00557   Returntype : none
00558   Exceptions : throw on invalid arguments
00559                warning if misc feature is already stored in this database
00560                throw if start/end/strand attribs are not valid
00561   Caller     : general
00562   Status     : Stable
00563 
00564 =cut
00565 
00566 sub store {
00567   my $self = shift;
00568   my @misc_features = @_;
00569 
00570   my $db = $self->db();
00571 
00572   my $feature_sth = $self->prepare
00573     ("INSERT INTO misc_feature SET " .
00574      " seq_region_id    = ?, " .
00575      " seq_region_start = ?, " .
00576      " seq_region_end   = ?, " .
00577      " seq_region_strand = ?");
00578 
00579   my $feature_set_sth = $self->prepare
00580     ("INSERT IGNORE misc_feature_misc_set SET " .
00581      " misc_feature_id = ?, " .
00582      " misc_set_id = ?");
00583 
00584   my $msa = $db->get_MiscSetAdaptor();
00585   my $aa  = $db->get_AttributeAdaptor();
00586 
00587  FEATURE:
00588   foreach my $mf (@misc_features) {
00589     if(!ref($mf) || !$mf->isa('Bio::EnsEMBL::MiscFeature')) {
00590       throw("List of MiscFeature arguments expeceted");
00591     }
00592 
00593     if($mf->is_stored($db)) {
00594       warning("MiscFeature [" .$mf->dbID."] is already stored in database.");
00595       next FEATURE;
00596     }
00597 
00598     # do some checking of the start/end and convert to seq_region coords
00599     my $original = $mf;
00600     my $seq_region_id;
00601     ($mf, $seq_region_id) = $self->_pre_store($mf);
00602 
00603     # store the actual MiscFeature
00604     $feature_sth->bind_param(1,$seq_region_id,SQL_INTEGER);
00605     $feature_sth->bind_param(2,$mf->start,SQL_INTEGER);
00606     $feature_sth->bind_param(3,$mf->end,SQL_INTEGER);
00607     $feature_sth->bind_param(4,$mf->strand,SQL_TINYINT);
00608     $feature_sth->execute();
00609 
00610     my $dbID = $feature_sth->{'mysql_insertid'};
00611 
00612     $mf->dbID($dbID);
00613     $mf->adaptor($self);
00614 
00615     # store all the attributes
00616     my $attribs = $mf->get_all_Attributes();
00617     $aa->store_on_MiscFeature($mf, $attribs);
00618 
00619     # store all the sets that have not been stored yet
00620     my $sets = $mf->get_all_MiscSets();
00621     foreach my $set (@$sets) {
00622       $msa->store($set) if(!$set->is_stored($db));
00623 
00624       # update the misc_feat_misc_set table to store the set relationship
00625       $feature_set_sth->bind_param(1,$dbID,SQL_INTEGER);
00626       $feature_set_sth->bind_param(2,$set->dbID,SQL_INTEGER);
00627 
00628       $feature_set_sth->execute();
00629     }
00630   }
00631 
00632   return;
00633 }
00634 
00635 1;
00636 
00637 
00638 
00639 
00640