Archive Ensembl HomeArchive Ensembl Home
DnaAlignFeatureAdaptor.pm
Go to the documentation of this file.
00001 =head1 LICENSE
00002 
00003   Copyright (c) 1999-2012 The European Bioinformatics Institute and
00004   Genome Research Limited.  All rights reserved.
00005 
00006   This software is distributed under a modified Apache license.
00007   For license details, please see
00008 
00009     http://www.ensembl.org/info/about/code_licence.html
00010 
00011 =head1 CONTACT
00012 
00013   Please email comments or questions to the public Ensembl
00014   developers list at <dev@ensembl.org>.
00015 
00016   Questions may also be sent to the Ensembl help desk at
00017   <helpdesk@ensembl.org>.
00018 
00019 =cut
00020 
00021 =head1 NAME
00022 
00023 Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures
00024 
00025 =head1 SYNOPSIS
00026 
00027   $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' );
00028 
00029   my @features = @{ $dafa->fetch_all_by_Slice($slice) };
00030 
00031   $dafa->store(@features);
00032 
00033 =head1 DESCRIPTION
00034 
00035 This is an adaptor responsible for the retrieval and storage of
00036 DnaDnaAlignFeatures from the database. This adaptor inherits most of its
00037 functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor
00038 superclasses.
00039 
00040 =head1 METHODS
00041 
00042 =cut
00043 
00044 
00045 package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
00046 use vars qw(@ISA);
00047 use strict;
00048 use Bio::EnsEMBL::DnaDnaAlignFeature;
00049 use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
00050 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00051 
00052 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
00053 
00054 
00055 =head2 _tables
00056 
00057   Args       : none
00058   Example    : @tabs = $self->_tables
00059   Description: PROTECTED implementation of the abstract method inherited from
00060                BaseFeatureAdaptor.  Returns list of [tablename, alias] pairs
00061   Returntype : list of listrefs of strings
00062   Exceptions : none
00063   Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
00064   Status     : Stable
00065 
00066 =cut
00067 
00068 sub _tables {
00069   my $self = shift;
00070 
00071   return (['dna_align_feature', 'daf'],['external_db','exdb']);
00072 }
00073 
00074 
00075 sub _left_join{
00076     return (['external_db',"exdb.external_db_id = daf.external_db_id"]);
00077 }
00078 
00079 =head2 _columns
00080 
00081   Args       : none
00082   Example    : @columns = $self->_columns
00083   Description: PROTECTED implementation of abstract superclass method.  
00084                Returns a list of columns that are needed for object creation.
00085   Returntype : list of strings
00086   Exceptions : none
00087   Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
00088   Status     : Stable
00089 
00090 =cut
00091 
00092 sub _columns {
00093   my $self = shift;
00094 
00095   #warning, implementation of _objs_from_sth method depends on order of list
00096   return qw(daf.dna_align_feature_id
00097             daf.seq_region_id
00098             daf.analysis_id
00099             daf.seq_region_start
00100             daf.seq_region_end
00101             daf.seq_region_strand
00102             daf.hit_start
00103             daf.hit_end
00104             daf.hit_name
00105             daf.hit_strand
00106             daf.cigar_line
00107             daf.evalue
00108             daf.perc_ident
00109             daf.score
00110             daf.external_db_id
00111             daf.hcoverage
00112         daf.external_data
00113         daf.pair_dna_align_feature_id
00114         exdb.db_name
00115         exdb.db_display_name);
00116 }
00117 
00118 
00119 =head2 store
00120 
00121   Arg [1]    : list of Bio::EnsEMBL::DnaAlignFeatures @feats
00122                the features to store in the database
00123   Example    : $dna_align_feature_adaptor->store(@features);
00124   Description: Stores a list of DnaAlignFeatures in the database
00125   Returntype : none
00126   Exceptions : throw if any of the provided features cannot be stored
00127                which may occur if:
00128                  * The feature does not have an associate Slice
00129                  * The feature does not have an associated analysis
00130                  * The Slice the feature is associated with is on a seq_region
00131                    unknown to this database
00132                A warning is given if:
00133                  * The feature has already been stored in this db
00134   Caller     : Pipeline
00135   Status     : Stable
00136 
00137 =cut
00138 
00139 sub store {
00140   my ( $self, @feats ) = @_;
00141 
00142   throw("Must call store with features") if ( scalar(@feats) == 0 );
00143 
00144   my @tabs = $self->_tables;
00145   my ($tablename) = @{ $tabs[0] };
00146 
00147   my $db               = $self->db();
00148   my $analysis_adaptor = $db->get_AnalysisAdaptor();
00149 
00150   my $sth = $self->prepare(
00151     "INSERT INTO $tablename (seq_region_id, seq_region_start,
00152                              seq_region_end, seq_region_strand,
00153                              hit_start, hit_end, hit_strand, hit_name,
00154                              cigar_line, analysis_id, score, evalue,
00155                              perc_ident, external_db_id, hcoverage,
00156                              pair_dna_align_feature_id)
00157      VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"    # 16 arguments
00158   );
00159 
00160 FEATURE:
00161   foreach my $feat (@feats) {
00162     if ( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") )
00163     {
00164       throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
00165           . " not a ["
00166           . ref($feat)
00167           . "]." );
00168     }
00169 
00170     if ( $feat->is_stored($db) ) {
00171       warning( "DnaDnaAlignFeature ["
00172           . $feat->dbID()
00173           . "] is already stored in this database." );
00174       next FEATURE;
00175     }
00176 
00177     my $hstart  = $feat->hstart();
00178     my $hend    = $feat->hend();
00179     my $hstrand = $feat->hstrand();
00180     $self->_check_start_end_strand( $hstart, $hend, $hstrand );
00181 
00182     my $cigar_string = $feat->cigar_string();
00183     if ( !$cigar_string ) {
00184       $cigar_string = $feat->length() . 'M';
00185       warning( "DnaDnaAlignFeature does not define a cigar_string.\n"
00186           . "Assuming ungapped block with cigar_line=$cigar_string ." );
00187     }
00188 
00189     my $hseqname = $feat->hseqname();
00190     if ( !$hseqname ) {
00191       throw("DnaDnaAlignFeature must define an hseqname.");
00192     }
00193 
00194     if ( !defined( $feat->analysis ) ) {
00195       throw(
00196         "An analysis must be attached to the features to be stored.");
00197     }
00198 
00199     #store the analysis if it has not been stored yet
00200     if ( !$feat->analysis->is_stored($db) ) {
00201       $analysis_adaptor->store( $feat->analysis() );
00202     }
00203 
00204     my $original = $feat;
00205     my $seq_region_id;
00206     ( $feat, $seq_region_id ) = $self->_pre_store($feat);
00207 
00208     $sth->bind_param( 1,  $seq_region_id,        SQL_INTEGER );
00209     $sth->bind_param( 2,  $feat->start,          SQL_INTEGER );
00210     $sth->bind_param( 3,  $feat->end,            SQL_INTEGER );
00211     $sth->bind_param( 4,  $feat->strand,         SQL_TINYINT );
00212     $sth->bind_param( 5,  $hstart,               SQL_INTEGER );
00213     $sth->bind_param( 6,  $hend,                 SQL_INTEGER );
00214     $sth->bind_param( 7,  $hstrand,              SQL_TINYINT );
00215     $sth->bind_param( 8,  $hseqname,             SQL_VARCHAR );
00216     $sth->bind_param( 9,  $cigar_string,         SQL_LONGVARCHAR );
00217     $sth->bind_param( 10, $feat->analysis->dbID, SQL_INTEGER );
00218     $sth->bind_param( 11, $feat->score,          SQL_DOUBLE );
00219     $sth->bind_param( 12, $feat->p_value,        SQL_DOUBLE );
00220     $sth->bind_param( 13, $feat->percent_id,     SQL_FLOAT );
00221     $sth->bind_param( 14, $feat->external_db_id, SQL_INTEGER );
00222     $sth->bind_param( 15, $feat->hcoverage,      SQL_DOUBLE );
00223     $sth->bind_param( 16, $feat->pair_dna_align_feature_id,
00224       SQL_INTEGER );
00225 
00226     $sth->execute();
00227 
00228     $original->dbID( $sth->{'mysql_insertid'} );
00229     $original->adaptor($self);
00230   } ## end foreach my $feat (@feats)
00231 
00232   $sth->finish();
00233 } ## end sub store
00234 
00235 
00236 sub save {
00237   my ($self, $features) = @_;
00238 
00239   my @feats = @$features;
00240   throw("Must call store with features") if( scalar(@feats) == 0 );
00241 
00242   my @tabs = $self->_tables;
00243   my ($tablename) = @{$tabs[0]};
00244 
00245   my $db = $self->db();
00246   my $analysis_adaptor = $db->get_AnalysisAdaptor();
00247 
00248   my $sql = qq{INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, hit_start, hit_end, hit_strand, hit_name, cigar_line, analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, pair_dna_align_feature_id, external_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)};
00249 
00250   my %analyses = ();
00251 
00252   my $sth = $self->prepare($sql);
00253      
00254  FEATURE: foreach my $feat ( @feats ) {
00255     if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
00256       throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
00257             . " not a [".ref($feat)."].");
00258     }
00259 
00260     if($feat->is_stored($db)) {
00261       warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
00262               " in this database.");
00263       next FEATURE;
00264     }
00265 
00266     my $hstart  = $feat->hstart || 0; # defined $feat->hstart  ? $feat->hstart : $feat->start ;
00267     my $hend    = $feat->hend   || 0; # defined $feat->hend    ? $feat->hend : $feat->end;
00268     my $hstrand = $feat->hstrand|| 0; # defined $feat->hstrand ? $feat->hstrand : $feat->strand;
00269     if( $hstart && $hend ) {
00270       if($hend < $hstart) {
00271         throw("Invalid Feature start/end [$hstart/$hend]. Start must be less than or equal to end.");
00272       }
00273     }
00274     my $cigar_string = $feat->cigar_string();
00275     if(!$cigar_string) {
00276       $cigar_string = $feat->length() . 'M';
00277       warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
00278               "Assuming ungapped block with cigar_line=$cigar_string .");
00279     }
00280 
00281     my $hseqname = $feat->hseqname();
00282     if(!$hseqname) {
00283       throw("DnaDnaAlignFeature must define an hseqname.");
00284     }
00285 
00286     if(!defined($feat->analysis)) {
00287       throw("An analysis must be attached to the features to be stored.");
00288     }
00289 
00290     #store the analysis if it has not been stored yet
00291     if(!$feat->analysis->is_stored($db)) {
00292       $analysis_adaptor->store($feat->analysis());
00293     }
00294 
00295     $analyses{ $feat->analysis->dbID }++;
00296 
00297     my $original = $feat;
00298     my $seq_region_id;
00299     ($feat, $seq_region_id) = $self->_pre_store_userdata($feat);
00300 
00301     my $extra_data = $feat->extra_data ? $self->dump_data($feat->extra_data) : '';
00302 
00303     $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
00304     $sth->bind_param(2,$feat->start,SQL_INTEGER);
00305     $sth->bind_param(3,$feat->end,SQL_INTEGER);
00306     $sth->bind_param(4,$feat->strand,SQL_TINYINT);
00307     $sth->bind_param(5,$hstart,SQL_INTEGER);
00308     $sth->bind_param(6,$hend,SQL_INTEGER);
00309     $sth->bind_param(7,$hstrand,SQL_TINYINT);
00310     $sth->bind_param(8,$hseqname,SQL_VARCHAR);
00311     $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
00312     $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
00313     $sth->bind_param(11,$feat->score,SQL_DOUBLE);
00314 #    $sth->bind_param(11,$feat->score); # if the above statement does not work it means you need to upgrade DBD::mysql, meantime you can replace it with this line
00315     $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
00316     $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
00317     $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
00318     $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);
00319     $sth->bind_param(16,$feat->pair_dna_align_feature_id,SQL_INTEGER);
00320     $sth->bind_param(17,$extra_data,SQL_LONGVARCHAR);
00321 
00322 
00323     $sth->execute();
00324     $original->dbID($sth->{'mysql_insertid'});
00325     $original->adaptor($self);
00326   }
00327 
00328   $sth->finish();
00329 
00330 ## js5 hack to update meta_coord table... 
00331   if( keys %analyses ) {
00332 
00333     my $sth = $self->prepare( 'select sr.coord_system_id, max(daf.seq_region_end-daf.seq_region_start) from seq_region as sr, dna_align_feature as daf where daf.seq_region_id=sr.seq_region_id and analysis_id in ('.join(',',keys %analyses).') group by coord_system_id' );
00334     $sth->execute;
00335 
00336     foreach( @{ $sth->fetchall_arrayref } ) {
00337       my $sth2 = $self->prepare( qq(insert ignore into meta_coord values("dna_align_feature",$_->[0],$_->[1])) );
00338       $sth2->execute;
00339       $sth2->finish;
00340 
00341       $sth2 = $self->prepare( qq(update meta_coord set max_length = $_->[1] where coord_system_id = $_->[0] and table_name="dna_align_feature" and max_length < $_->[1]) );
00342       $sth2->execute;
00343       $sth2->finish;
00344     }
00345 
00346     $sth->finish;
00347   }
00348 
00349 }
00350 
00351 
00352 =head2 _objs_from_sth
00353 
00354   Arg [1]    : DBI statement handle $sth
00355                an exectuted DBI statement handle generated by selecting 
00356                the columns specified by _columns() from the table specified 
00357                by _table()
00358   Example    : @dna_dna_align_feats = $self->_obj_from_hashref
00359   Description: PROTECTED implementation of superclass abstract method. 
00360                Creates DnaDnaAlignFeature objects from a DBI hashref
00361   Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
00362   Exceptions : none
00363   Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
00364   Status     : Stable
00365 
00366 =cut
00367 
00368 sub _objs_from_sth {
00369   my ( $self, $sth, $mapper, $dest_slice ) = @_;
00370 
00371   #
00372   # This code is ugly because an attempt has been made to remove as many
00373   # function calls as possible for speed purposes.  Thus many caches and
00374   # a fair bit of gymnastics is used.
00375   #
00376 
00377   # In case of userdata we need the features on the dest_slice.  In case
00378   # of get_all_supporting_features dest_slice is not provided.
00379   my $sa = (   $dest_slice
00380              ? $dest_slice->adaptor()
00381              : $self->db()->get_SliceAdaptor() );
00382   my $aa = $self->db->get_AnalysisAdaptor();
00383 
00384   my @features;
00385   my %analysis_hash;
00386   my %slice_hash;
00387   my %sr_name_hash;
00388   my %sr_cs_hash;
00389 
00390   my ( $dna_align_feature_id, $seq_region_id,
00391        $analysis_id,          $seq_region_start,
00392        $seq_region_end,       $seq_region_strand,
00393        $hit_start,            $hit_end,
00394        $hit_name,             $hit_strand,
00395        $cigar_line,           $evalue,
00396        $perc_ident,           $score,
00397        $external_db_id,       $hcoverage,
00398        $extra_data,           $pair_dna_align_feature_id,
00399        $external_db_name,     $external_display_db_name );
00400 
00401   $sth->bind_columns( \( $dna_align_feature_id, $seq_region_id,
00402                          $analysis_id,          $seq_region_start,
00403                          $seq_region_end,       $seq_region_strand,
00404                          $hit_start,            $hit_end,
00405                          $hit_name,             $hit_strand,
00406                          $cigar_line,           $evalue,
00407                          $perc_ident,           $score,
00408                          $external_db_id,       $hcoverage,
00409                          $extra_data,       $pair_dna_align_feature_id,
00410                          $external_db_name, $external_display_db_name )
00411   );
00412 
00413   my $asm_cs;
00414   my $cmp_cs;
00415   my $asm_cs_vers;
00416   my $asm_cs_name;
00417   my $cmp_cs_vers;
00418   my $cmp_cs_name;
00419 
00420   if ( defined($mapper) ) {
00421     $asm_cs      = $mapper->assembled_CoordSystem();
00422     $cmp_cs      = $mapper->component_CoordSystem();
00423     $asm_cs_name = $asm_cs->name();
00424     $asm_cs_vers = $asm_cs->version();
00425     $cmp_cs_name = $cmp_cs->name();
00426     $cmp_cs_vers = $cmp_cs->version();
00427   }
00428 
00429   my $dest_slice_start;
00430   my $dest_slice_end;
00431   my $dest_slice_strand;
00432   my $dest_slice_length;
00433   my $dest_slice_sr_name;
00434   my $dest_slice_seq_region_id;
00435 
00436   if ( defined($dest_slice) ) {
00437     $dest_slice_start         = $dest_slice->start();
00438     $dest_slice_end           = $dest_slice->end();
00439     $dest_slice_strand        = $dest_slice->strand();
00440     $dest_slice_length        = $dest_slice->length();
00441     $dest_slice_sr_name       = $dest_slice->seq_region_name();
00442     $dest_slice_seq_region_id = $dest_slice->get_seq_region_id();
00443   }
00444 
00445 FEATURE:
00446   while ( $sth->fetch() ) {
00447     # Get the analysis object.
00448     my $analysis = $analysis_hash{$analysis_id} ||=
00449       $aa->fetch_by_dbID($analysis_id);
00450 
00451     # Get the slice object.
00452     my $slice = $slice_hash{ "ID:" . $seq_region_id };
00453 
00454     if ( !defined($slice) ) {
00455       $slice = $sa->fetch_by_seq_region_id($seq_region_id);
00456       if ( defined($slice) ) {
00457         $slice_hash{ "ID:" . $seq_region_id } = $slice;
00458         $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
00459         $sr_cs_hash{$seq_region_id}   = $slice->coord_system();
00460       }
00461     }
00462 
00463     my $sr_name = $sr_name_hash{$seq_region_id};
00464     my $sr_cs   = $sr_cs_hash{$seq_region_id};
00465 
00466     # Remap the feature coordinates to another coord system
00467     # if a mapper was provided.
00468     if ( defined($mapper) ) {
00469       ( $seq_region_id,  $seq_region_start,
00470         $seq_region_end, $seq_region_strand )
00471         =
00472         $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
00473                           $seq_region_strand, $sr_cs );
00474 
00475       # Skip features that map to gaps or coord system boundaries.
00476       if ( !defined($seq_region_id) ) { next FEATURE }
00477 
00478       # Get a slice in the coord system we just mapped to.
00479       if ( $asm_cs == $sr_cs
00480            || ( $cmp_cs != $sr_cs && $asm_cs->equals($sr_cs) ) )
00481       {
00482         $slice = $slice_hash{ "ID:" . $seq_region_id } ||=
00483           $sa->fetch_by_seq_region_id($seq_region_id);
00484       } else {
00485         $slice = $slice_hash{ "ID:" . $seq_region_id } ||=
00486           $sa->fetch_by_seq_region_id($seq_region_id);
00487       }
00488     }
00489 
00490     # If a destination slice was provided, convert the coords.  If the
00491     # dest_slice starts at 1 and is forward strand, nothing needs doing.
00492     if ( defined($dest_slice) ) {
00493       if ( $dest_slice_start != 1 || $dest_slice_strand != 1 ) {
00494         if ( $dest_slice_strand == 1 ) {
00495           $seq_region_start = $seq_region_start - $dest_slice_start + 1;
00496           $seq_region_end   = $seq_region_end - $dest_slice_start + 1;
00497         } else {
00498           my $tmp_seq_region_start = $seq_region_start;
00499           $seq_region_start = $dest_slice_end - $seq_region_end + 1;
00500           $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
00501           $seq_region_strand = -$seq_region_strand;
00502         }
00503 
00504         # Throw away features off the end of the requested slice.
00505         if (    $seq_region_end < 1
00506              || $seq_region_start > $dest_slice_length
00507              || ( $dest_slice_seq_region_id ne $seq_region_id ) )
00508         {
00509           next FEATURE;
00510         }
00511       }
00512       $slice = $dest_slice;
00513     }
00514 
00515     # Finally, create the new DnaAlignFeature.
00516     push( @features,
00517           $self->_create_feature_fast(
00518              'Bio::EnsEMBL::DnaDnaAlignFeature', {
00519                'slice'          => $slice,
00520                'start'          => $seq_region_start,
00521                'end'            => $seq_region_end,
00522                'strand'         => $seq_region_strand,
00523                'hseqname'       => $hit_name,
00524                'hstart'         => $hit_start,
00525                'hend'           => $hit_end,
00526                'hstrand'        => $hit_strand,
00527                'score'          => $score,
00528                'p_value'        => $evalue,
00529                'percent_id'     => $perc_ident,
00530                'cigar_string'   => $cigar_line,
00531                'analysis'       => $analysis,
00532                'adaptor'        => $self,
00533                'dbID'           => $dna_align_feature_id,
00534                'external_db_id' => $external_db_id,
00535                'hcoverage'      => $hcoverage,
00536                'extra_data'     => (
00537                                    $extra_data
00538                                  ? $self->get_dumped_data($extra_data)
00539                                  : '' ),
00540                'dbname'                    => $external_db_name,
00541                'db_display_name'           => $external_display_db_name,
00542                'pair_dna_align_feature_id' => $pair_dna_align_feature_id
00543              } ) );
00544 
00545   } ## end while ( $sth->fetch() )
00546 
00547   return \@features;
00548 } ## end sub _objs_from_sth
00549 
00550 =head2 list_dbIDs
00551 
00552   Arg [1]    : none
00553   Example    : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
00554   Description: Gets an array of internal ids for all dna align features in 
00555                the current db
00556   Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
00557   Returntype : list of ints
00558   Exceptions : none
00559   Caller     : ?
00560   Status     : Stable
00561 
00562 =cut
00563 
00564 sub list_dbIDs {
00565    my ($self, $ordered) = @_;
00566 
00567    return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
00568 }
00569 
00570 1;
00571 
00572