Archive Ensembl HomeArchive Ensembl Home
ProteinAlignFeatureAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor - 
00024 Adaptor for ProteinAlignFeatures
00025 
00026 =head1 SYNOPSIS
00027 
00028   $pafa =
00029     $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );
00030 
00031   my @features = @{ $pafa->fetch_all_by_Slice($slice) };
00032 
00033   $pafa->store(@features);
00034 
00035 =head1 METHODS
00036 
00037 =cut
00038 
00039 
00040 package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
00041 use vars qw(@ISA);
00042 use strict;
00043 
00044 use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
00045 use Bio::EnsEMBL::DnaPepAlignFeature;
00046 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00047 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
00048 
00049 
00050 =head2 store
00051 
00052   Arg [1]    : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
00053   Example    : $protein_align_feature_adaptor->store(@feats);
00054   Description: stores a list of ProteinAlignFeatures in the database
00055   Returntype : none
00056   Exceptions : throw if any of the provided features cannot be stored
00057                which may occur if:
00058                  * The feature does not have an associated Slice
00059                  * The feature does not have an associated analysis
00060                  * The Slice the feature is associated with is on a seq_region
00061                    unknown to this database
00062               A warning is given if:
00063                  * The feature has already been stored in this db
00064   Caller     : Pipeline
00065   Status     : Stable
00066 
00067 =cut
00068 
00069 
00070 sub store{
00071  my ($self, @feats) = @_;
00072 
00073   throw("Must call store with features") if( scalar(@feats) == 0 );
00074 
00075   my @tabs = $self->_tables;
00076   my ($tablename) = @{$tabs[0]};
00077 
00078   my $db = $self->db();
00079   my $slice_adaptor = $db->get_SliceAdaptor();
00080   my $analysis_adaptor = $db->get_AnalysisAdaptor();
00081 
00082   my $sth = $self->prepare(
00083      "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
00084                              seq_region_strand, hit_start, hit_end,
00085                              hit_name, cigar_line,
00086                              analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
00087      VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");
00088 
00089  FEATURE: foreach my $feat ( @feats ) {
00090    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
00091      throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
00092            . " not a [".ref($feat)."].");
00093    }
00094 
00095    if($feat->is_stored($db)) {
00096      warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
00097              " in this database.");
00098      next FEATURE;
00099    }
00100 
00101    #sanity check the hstart and hend
00102    my $hstart  = $feat->hstart();
00103    my $hend    = $feat->hend();
00104    $self->_check_start_end_strand($hstart,$hend,1);
00105 
00106    my $cigar_string = $feat->cigar_string();
00107    if(!$cigar_string) {
00108      $cigar_string = $feat->length() . 'M';
00109      warning("DnaPepAlignFeature does not define a cigar_string.\n" .
00110              "Assuming ungapped block with cigar_string=$cigar_string\n");
00111    }
00112 
00113    my $hseqname = $feat->hseqname();
00114    if(!$hseqname) {
00115      throw("DnaPepAlignFeature must define an hseqname.");
00116    }
00117 
00118    if(!defined($feat->analysis)) {
00119      throw("An analysis must be attached to the features to be stored.");
00120    }
00121 
00122    #store the analysis if it has not been stored yet
00123    if(!$feat->analysis->is_stored($db)) {
00124      $analysis_adaptor->store($feat->analysis());
00125    }
00126 
00127    my $slice = $feat->slice();
00128    if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
00129      throw("A slice must be attached to the features to be stored.");
00130    }
00131 
00132    my $original = $feat;
00133    my $seq_region_id;
00134    ($feat, $seq_region_id) = $self->_pre_store($feat);
00135 
00136    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
00137    $sth->bind_param(2,$feat->start,SQL_INTEGER);
00138    $sth->bind_param(3,$feat->end,SQL_INTEGER);
00139    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
00140    $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
00141    $sth->bind_param(6,$feat->hend,SQL_INTEGER);
00142    $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
00143    $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
00144    $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
00145    $sth->bind_param(10,$feat->score,SQL_DOUBLE);
00146    $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
00147    $sth->bind_param(12,$feat->percent_id,SQL_REAL);
00148    $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
00149    $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
00150 
00151    $sth->execute();
00152    $original->dbID($sth->{'mysql_insertid'});
00153    $original->adaptor($self);
00154  }
00155 
00156  $sth->finish();
00157 }
00158 
00159 
00160 =head2 _objs_from_sth
00161 
00162   Arg [1]    : DBI statement handle $sth
00163                an exectuted DBI statement handle generated by selecting 
00164                the columns specified by _columns() from the table specified 
00165                by _table()
00166   Example    : @dna_dna_align_feats = $self->_obj_from_hashref
00167   Description: PROTECTED implementation of superclass abstract method. 
00168                Creates DnaDnaAlignFeature objects from a DBI hashref
00169   Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
00170   Exceptions : none
00171   Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
00172   Status     : Stable
00173 
00174 =cut
00175 
00176 sub _objs_from_sth {
00177   my ($self, $sth, $mapper, $dest_slice) = @_;
00178 
00179   #
00180   # This code is ugly because an attempt has been made to remove as many
00181   # function calls as possible for speed purposes.  Thus many caches and
00182   # a fair bit of gymnastics is used.
00183   #
00184 
00185   my $sa = $self->db()->get_SliceAdaptor();
00186   my $aa = $self->db->get_AnalysisAdaptor();
00187 
00188   my @features;
00189   my %analysis_hash;
00190   my %slice_hash;
00191   my %sr_name_hash;
00192   my %sr_cs_hash;
00193 
00194   my ($protein_align_feature_id, $seq_region_id, $seq_region_start,
00195       $seq_region_end, $analysis_id, $seq_region_strand, $hit_start,
00196       $hit_end, $hit_name, $cigar_line, $evalue, $perc_ident, $score,
00197       $external_db_id, $hcoverage, $external_db_name, $external_display_db_name );
00198 
00199   $sth->bind_columns(\$protein_align_feature_id, \$seq_region_id,
00200            \$seq_region_start,\$seq_region_end, \$analysis_id,
00201            \$seq_region_strand, \$hit_start,\$hit_end, \$hit_name,
00202            \$cigar_line, \$evalue, \$perc_ident, \$score,
00203            \$external_db_id, \$hcoverage, \$external_db_name, \$external_display_db_name );
00204 
00205   my $asm_cs;
00206   my $cmp_cs;
00207   my $asm_cs_vers;
00208   my $asm_cs_name;
00209   my $cmp_cs_vers;
00210   my $cmp_cs_name;
00211   if($mapper) {
00212     $asm_cs = $mapper->assembled_CoordSystem();
00213     $cmp_cs = $mapper->component_CoordSystem();
00214     $asm_cs_name = $asm_cs->name();
00215     $asm_cs_vers = $asm_cs->version();
00216     $cmp_cs_name = $cmp_cs->name();
00217     $cmp_cs_vers = $cmp_cs->version();
00218   }
00219 
00220   my $dest_slice_start;
00221   my $dest_slice_end;
00222   my $dest_slice_strand;
00223   my $dest_slice_length;
00224   my $dest_slice_sr_name;
00225   my $dest_slice_sr_id;
00226   if($dest_slice) {
00227     $dest_slice_start  = $dest_slice->start();
00228     $dest_slice_end    = $dest_slice->end();
00229     $dest_slice_strand = $dest_slice->strand();
00230     $dest_slice_length = $dest_slice->length();
00231     $dest_slice_sr_name = $dest_slice->seq_region_name();
00232     $dest_slice_sr_id  = $dest_slice->get_seq_region_id();
00233   }
00234 
00235   FEATURE: while($sth->fetch()) {
00236     #get the analysis object
00237     my $analysis = $analysis_hash{$analysis_id} ||=
00238       $aa->fetch_by_dbID($analysis_id);
00239 
00240     #get the slice object
00241     my $slice = $slice_hash{"ID:".$seq_region_id};
00242 
00243     if(!$slice) {
00244       $slice = $sa->fetch_by_seq_region_id($seq_region_id);
00245       $slice_hash{"ID:".$seq_region_id} = $slice;
00246       $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
00247       $sr_cs_hash{$seq_region_id} = $slice->coord_system();
00248     }
00249 
00250     my $sr_name = $sr_name_hash{$seq_region_id};
00251     my $sr_cs   = $sr_cs_hash{$seq_region_id};
00252     #
00253     # remap the feature coordinates to another coord system
00254     # if a mapper was provided
00255     #
00256     if($mapper) {
00257 
00258       ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
00259         $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
00260                           $seq_region_strand, $sr_cs);
00261 
00262       #skip features that map to gaps or coord system boundaries
00263       next FEATURE if(!defined($seq_region_id));
00264 
00265  #     #get a slice in the coord system we just mapped to
00266  #     if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
00267         $slice = $slice_hash{"ID:".$seq_region_id} ||=
00268           $sa->fetch_by_seq_region_id($seq_region_id);
00269 #      } else {
00270 #        $slice = $slice_hash{"ID:".$seq_region_id} ||=
00271 #          $sa->fetch_by_seq_region_id($asm_cs_name, $sr_name, undef, undef, undef,
00272 #                               $asm_cs_vers);
00273 #      }
00274     }
00275 
00276     #
00277     # If a destination slice was provided convert the coords
00278     # If the dest_slice starts at 1 and is foward strand, nothing needs doing
00279     #
00280     if($dest_slice) {
00281       if($dest_slice_start != 1 || $dest_slice_strand != 1) {
00282         if($dest_slice_strand == 1) {
00283           $seq_region_start = $seq_region_start - $dest_slice_start + 1;
00284           $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
00285         } else {
00286           my $tmp_seq_region_start = $seq_region_start;
00287           $seq_region_start = $dest_slice_end - $seq_region_end + 1;
00288           $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
00289           $seq_region_strand *= -1;
00290         }
00291       }
00292        
00293       #throw away features off the end of the requested slice
00294       if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
00295     ( $dest_slice_sr_id ne $seq_region_id )) {
00296     next FEATURE;
00297       }
00298       $slice = $dest_slice;
00299     }
00300 
00301     # Finally, create the new ProteinAlignFeature.
00302     push(
00303       @features,
00304       $self->_create_feature_fast(
00305         'Bio::EnsEMBL::DnaPepAlignFeature', {
00306           'slice'        => $slice,
00307           'start'        => $seq_region_start,
00308           'end'          => $seq_region_end,
00309           'strand'       => $seq_region_strand,
00310           'hseqname'     => $hit_name,
00311           'hstart'       => $hit_start,
00312           'hend'         => $hit_end,
00313           'hstrand'      => 1,                  # dna_pep_align features
00314                                                 # are always hstrand 1
00315           'score'        => $score,
00316           'p_value'      => $evalue,
00317           'percent_id'   => $perc_ident,
00318           'cigar_string' => $cigar_line,
00319           'analysis'     => $analysis,
00320           'adaptor'      => $self,
00321           'dbID'           => $protein_align_feature_id,
00322           'external_db_id' => $external_db_id,
00323           'hcoverage'      => $hcoverage,
00324       'dbname'         => $external_db_name,
00325       'db_display_name' => $external_display_db_name
00326         } ) );
00327 
00328   }
00329 
00330   return \@features;
00331 }
00332 
00333 
00334 
00335 sub _tables {
00336   my $self = shift;
00337 
00338   return (['protein_align_feature', 'paf'], ['external_db','exdb']);
00339 }
00340 
00341 
00342 sub _columns {
00343   my $self = shift;
00344 
00345   #warning _objs_from_hashref method depends on ordering of this list 
00346   return qw( paf.protein_align_feature_id
00347              paf.seq_region_id
00348              paf.seq_region_start
00349              paf.seq_region_end
00350              paf.analysis_id
00351              paf.seq_region_strand
00352              paf.hit_start
00353              paf.hit_end
00354              paf.hit_name
00355              paf.cigar_line
00356              paf.evalue
00357              paf.perc_ident
00358              paf.score
00359              paf.external_db_id
00360              paf.hcoverage
00361          exdb.db_name
00362          exdb.db_display_name);
00363 }
00364 
00365 sub _left_join{
00366     return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
00367 }
00368 
00369 =head2 list_dbIDs
00370 
00371   Arg [1]    : none
00372   Example    : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
00373   Description: Gets an array of internal ids for all protein align 
00374                features in the current db
00375   Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
00376   Returntype : listref of ints
00377   Exceptions : none
00378   Caller     : ?
00379   Status     : Stable
00380 
00381 =cut
00382 
00383 sub list_dbIDs {
00384    my ($self,$ordered) = @_;
00385 
00386    return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
00387 }
00388 
00389 
00390 
00391 
00392 
00393 
00394 1;