Archive Ensembl HomeArchive Ensembl Home
SubsetAdaptor.pm
Go to the documentation of this file.
00001 package Bio::EnsEMBL::Compara::DBSQL::SubsetAdaptor;
00002 
00003 use strict;
00004 use Bio::EnsEMBL::Compara::Member;
00005 use Bio::EnsEMBL::Compara::Attribute;
00006 use Bio::EnsEMBL::Compara::Subset;
00007 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
00008 use Bio::EnsEMBL::Utils::Exception;
00009 
00010 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
00011 
00012 =head2 fetch_by_dbID
00013 
00014   Arg [1]    : int $id
00015                the unique database identifier for the feature to be obtained
00016   Example    : $feat = $adaptor->fetch_by_dbID(1234);
00017   Description: Returns the feature created from the database defined by the
00018                the id $id.
00019   Returntype : Bio::EnsEMBL::Compara::Subset
00020   Exceptions : thrown if $id is not defined
00021   Caller     : general
00022 
00023 =cut
00024 
00025 sub fetch_by_dbID{
00026   my ($self,$id) = @_;
00027 
00028   unless(defined $id) {
00029     $self->throw("fetch_by_dbID must have an id");
00030   }
00031 
00032   my @tabs = $self->_tables;
00033 
00034   my ($name, $syn) = @{$tabs[0]};
00035 
00036   #construct a constraint like 't1.table1_id = 1'
00037   my $constraint = "${syn}.${name}_id = $id";
00038 
00039   #return first element of _generic_fetch list
00040   my ($obj) = @{$self->_generic_fetch($constraint)};
00041   return $obj;
00042 }
00043 
00044 =head2 fetch_by_set_description
00045 
00046   Arg [1]    : string $set_description
00047   Example    : 
00048   Description: 
00049   Returntype : 
00050   Exceptions : 
00051   Caller     : 
00052 
00053 =cut
00054 
00055 sub fetch_by_set_description {
00056   my ($self,$set_description) = @_;
00057 
00058   unless(defined $set_description) {
00059     $self->throw("fetch_by_set_name must have a set_description");
00060   }
00061 
00062   #construct a constraint like 't1.table1_id = 1'
00063   my $constraint = "s.description = '$set_description'";
00064   print("fetch_by_set_name contraint:\n$constraint\n");
00065 
00066   #return first element of _generic_fetch list
00067   my ($obj) = @{$self->_generic_fetch($constraint)};
00068   return $obj;
00069 }
00070 
00071 
00072 =head2 fetch_by_description_pattern
00073 
00074   Arg [1]    : string $description_pattern
00075   Example    : 
00076   Description: 
00077   Returntype : 
00078   Exceptions : 
00079   Caller     : 
00080 
00081 =cut
00082 
00083 sub fetch_by_description_pattern {
00084   my ($self,$description_pattern) = @_;
00085 
00086   unless(defined $description_pattern) {
00087     $self->throw("fetch_by_description_pattern must have a description_pattern");
00088   }
00089 
00090   #construct a constraint like 't1.table1_id = 1'
00091   my $constraint = "s.description LIKE '$description_pattern'";
00092   # print("fetch_by_description_pattern contraint:\n$constraint\n");
00093 
00094   #return first element of _generic_fetch list
00095   my ($obj) = @{$self->_generic_fetch($constraint)};
00096   return $obj;
00097 }
00098 
00099 
00100 =head2 fetch_all
00101 
00102   Arg        : None
00103   Example    : 
00104   Description: 
00105   Returntype : 
00106   Exceptions : 
00107   Caller     : 
00108 
00109 =cut
00110 
00111 sub fetch_all {
00112   my $self = shift;
00113 
00114   return $self->_generic_fetch();
00115 }
00116 
00117 
00118 
00119 #
00120 # INTERNAL METHODS
00121 #
00122 ###################
00123 
00124 =head2 _generic_fetch
00125 
00126   Arg [1]    : (optional) string $constraint
00127                An SQL query constraint (i.e. part of the WHERE clause)
00128   Arg [2]    : (optional) string $logic_name
00129                the logic_name of the analysis of the features to obtain
00130   Example    : $fts = $a->_generic_fetch('contig_id in (1234, 1235)', 'Swall');
00131   Description: Performs a database fetch and returns feature objects in
00132                contig coordinates.
00133   Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates
00134   Exceptions : none
00135   Caller     : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::_generic_fetch
00136 
00137 =cut
00138   
00139 sub _generic_fetch {
00140   my ($self, $constraint, $join) = @_;
00141   
00142   my @tables = $self->_tables;
00143   my $columns = join(', ', $self->_columns());
00144   
00145   if ($join) {
00146     foreach my $single_join (@{$join}) {
00147       my ($tablename, $condition, $extra_columns) = @{$single_join};
00148       if ($tablename && $condition) {
00149         push @tables, $tablename;
00150         
00151         if($constraint) {
00152           $constraint .= " AND $condition";
00153         } else {
00154           $constraint = " $condition";
00155         }
00156       } 
00157       if ($extra_columns) {
00158         $columns .= ", " . join(', ', @{$extra_columns});
00159       }
00160     }
00161   }
00162       
00163   #construct a nice table string like 'table1 t1, table2 t2'
00164   my $tablenames = join(', ', map({ join(' ', @$_) } @tables));
00165 
00166   my $sql = "SELECT $columns FROM $tablenames";
00167 
00168   my $default_where = $self->_default_where_clause;
00169   my $final_clause = $self->_final_clause;
00170 
00171   #append a where clause if it was defined
00172   if($constraint) { 
00173     $sql .= " WHERE $constraint ";
00174     if($default_where) {
00175       $sql .= " AND $default_where ";
00176     }
00177   } elsif($default_where) {
00178     $sql .= " WHERE $default_where ";
00179   }
00180 
00181   #append additional clauses which may have been defined
00182   $sql .= " $final_clause";
00183 
00184   my $sth = $self->prepare($sql);
00185   $sth->execute;  
00186 
00187   #print STDERR $sql,"\n";
00188 
00189   return $self->_objs_from_sth($sth);
00190 }
00191 
00192 sub _tables {
00193   my $self = shift;
00194 
00195   return (['subset', 's'], ['subset_member', 'sm']);
00196 }
00197 
00198 sub _columns {
00199   my $self = shift;
00200 
00201   return qw (s.subset_id
00202              s.description
00203              sm.subset_id
00204              sm.member_id);
00205 }
00206 
00207 sub _objs_from_sth {
00208   my ($self, $sth) = @_;
00209   
00210   my %column;
00211   $sth->bind_columns( \( @column{ @{$sth->{NAME_lc} } } ));
00212 
00213   my @sets = ();
00214   my %setNames;
00215   my %setMemberIds;
00216 
00217   while ($sth->fetch()) {
00218     my ($subset_id, $name, $member_id);
00219     $subset_id = $column{'subset_id'};
00220     $name = $column{'description'};
00221     $member_id = $column{'member_id'};
00222 
00223     if(defined($setMemberIds{$subset_id})) {
00224       $setMemberIds{$subset_id}->{$member_id} = $member_id;
00225     }
00226     else {
00227       $setNames{$subset_id} = $name;
00228       $setMemberIds{$subset_id} = {};
00229       $setMemberIds{$subset_id}->{$member_id} = $member_id;
00230     }
00231   }
00232   $sth->finish;
00233 
00234   my @allSubsetIds = keys(%setNames);
00235 
00236   foreach my $subset_id (@allSubsetIds) {
00237     my ($subset, @member_id_list, $member_id);
00238 
00239     @member_id_list = keys(%{$setMemberIds{$subset_id}});
00240     my $count = $#member_id_list + 1;
00241     # print("subset id = $subset_id has $count unique member_ids\n");
00242     
00243     $subset = Bio::EnsEMBL::Compara::Subset->new(-dbid => $subset_id,
00244                                                  -name => $setNames{$subset_id},
00245                                                  -adaptor => $self);
00246     # print("loading set '" . $setNames{$subset_id} . "' id=$subset_id\n");
00247 
00248     @{$subset->{'_member_id_list'}} = @member_id_list;
00249 
00250     push @sets, $subset;
00251   }
00252 
00253   return \@sets
00254 }
00255 
00256 sub _default_where_clause {
00257   my $self = shift;
00258 
00259   return 's.subset_id = sm.subset_id';
00260 }
00261 
00262 sub _final_clause {
00263   my $self = shift;
00264 
00265   return '';
00266 }
00267 
00268 
00269 #
00270 # STORE METHODS
00271 #
00272 ################
00273 
00274 =head2 store
00275 
00276   Arg [1]    :
00277   Example    :
00278   Description:
00279   Returntype :
00280   Exceptions :
00281   Caller     :
00282 
00283 =cut
00284 
00285 sub store {
00286   my ($self,$subset) = @_;
00287 
00288   unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) {
00289     $self->throw(
00290       "set arg must be a [Bio::EnsEMBL::Compara::Subset] "
00291     . "not a $subset");
00292   }
00293 
00294   my $sth = $self->prepare("INSERT ignore INTO subset (description) VALUES (?)");
00295   if($sth->execute($subset->description) >0) {
00296     $subset->dbID( $sth->{'mysql_insertid'} );
00297   } else {
00298     #print("insert failed, do select\n");
00299     my $sth2 = $self->prepare("SELECT subset_id FROM subset WHERE description=?");
00300     $sth2->execute($subset->description);
00301     my($id) = $sth2->fetchrow_array();
00302     $subset->dbID($id);
00303     $sth2->finish;
00304   }
00305   $sth->finish;
00306   #print("SubsetAdaptor:store() dbID = ", $subset->dbID, "\n");
00307 
00308   my @memberIds = @{$subset->member_id_list()};
00309   $sth = $self->prepare("INSERT ignore INTO subset_member (subset_id, member_id) VALUES (?,?)");
00310   foreach my $member_id (@memberIds) {
00311     $sth->execute($subset->dbID, $member_id) if($member_id);
00312   }
00313   $sth->finish;
00314 
00315   $subset->adaptor($self);
00316 
00317   return $subset->dbID;
00318 }
00319 
00320 
00321 =head2 store_link
00322 
00323   Arg [1]    :  Bio::EnsEMBL::Compara::MemberSet $subset
00324   Arg [2]    :  int $member_id
00325   Example    :
00326   Description:
00327   Returntype :
00328   Exceptions :
00329   Caller     :
00330 
00331 =cut
00332 
00333 sub store_link {
00334   my ($self, $subset, $member_id) = @_;
00335 
00336   unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) {
00337     $self->throw(
00338       "set arg must be a [Bio::EnsEMBL::Compara::Subset] "
00339     . "not a $subset");
00340   }
00341   return unless($member_id);
00342 
00343   my $sth = $self->prepare("INSERT ignore INTO subset_member (subset_id, member_id) VALUES (?,?)");
00344   $sth->execute($subset->dbID, $member_id);
00345   $sth->finish;
00346 }
00347 
00348 
00349 =head2 delete_link
00350 
00351   Arg [1]    :  Bio::EnsEMBL::Compara::MemberSet $subset
00352   Arg [2]    :  int $member_id
00353   Example    :
00354   Description:
00355   Returntype :
00356   Exceptions :
00357   Caller     :
00358 
00359 =cut
00360 
00361 sub delete_link {
00362   my ($self, $subset, $member_id) = @_;
00363 
00364   unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) {
00365     $self->throw(
00366       "set arg must be a [Bio::EnsEMBL::Compara::Subset] "
00367     . "not a $subset");
00368   }
00369 
00370   my $sth =
00371     $self->prepare("DELETE FROM subset_member WHERE subset_id=? AND member_id=?");
00372   $sth->execute($subset->dbID, $member_id);
00373   $sth->finish;
00374 }
00375 
00376 
00377 sub dumpFastaForSubset {
00378   my($self, $subset, $fastafile) = @_;
00379 
00380   unless($subset && $subset->isa('Bio::EnsEMBL::Compara::Subset')) {
00381     throw(
00382       "set arg must be a [Bio::EnsEMBL::Compara::Subset] "
00383     . "not a $subset");
00384   }
00385   unless($subset->dbID) {
00386     throw("subset must be in database and dbID defined");
00387   }
00388   
00389   my $sql = "SELECT member.source_name, member.stable_id, member.genome_db_id," .
00390             " member.member_id, member.description, sequence.sequence " .
00391             " FROM member, sequence, subset_member " .
00392             " WHERE subset_member.subset_id = " . $subset->dbID .
00393             " AND member.member_id=subset_member.member_id ".
00394             " AND member.sequence_id=sequence.sequence_id " ;
00395            # " ORDER BY member.stable_id;";
00396 
00397   open FASTAFILE, ">$fastafile"
00398     or die "Could not open $fastafile for output\n";
00399   print("writing fasta to loc '$fastafile'\n");
00400 
00401   my $sth = $self->prepare( $sql );
00402   $sth->execute();
00403 
00404   my ($source_name, $stable_id, $genome_db_id, $member_id, $description, $sequence);
00405   $sth->bind_columns( undef, \$source_name, \$stable_id, \$genome_db_id,
00406       \$member_id, \$description, \$sequence );
00407 
00408   while( $sth->fetch() ) {
00409     $sequence =~ s/(.{72})/$1\n/g;
00410     $genome_db_id ||= 0;
00411     print FASTAFILE ">$source_name:$stable_id IDs:$genome_db_id:$member_id $description\n$sequence\n";
00412   }
00413   close(FASTAFILE);
00414 
00415   $sth->finish();
00416 
00417   #
00418   # update this subset_id's  subset.dump_loc with the full path of this dumped fasta file
00419   #
00420 
00421   $sth = $self->prepare("UPDATE subset SET dump_loc = ? WHERE subset_id = ?");
00422   $sth->execute($fastafile, $subset->dbID);
00423   $sth->finish;
00424   $subset->dump_loc($fastafile);
00425 }
00426 
00427 1;
00428 
00429 
00430 
00431 
00432