QueryManager/QueryManager.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : QueryManager
00025 // File Name    : QueryManager.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "QueryManager.h"
00034 #include "SequenceReader.h"
00035 #include "SequenceEncoder.h"
00036 #include "HashTableGeneric.h"
00037 #include "HashTableTranslated.h"
00038 #include "MatchStoreUngapped.h"
00039 #include "MatchStoreGapped.h"
00040 
00041 
00042 // ### Function Definitions ###
00043 
00044 // MatchAdder member function definitions
00045 
00046 void MatchAdderImp::operator()(  SequenceNumber subjectNum,
00047                                      SequenceOffset numBases,
00048                                      SequenceOffset queryStart,
00049                                      SequenceOffset queryEnd,
00050                                      SequenceOffset subjectStart,
00051                                      SequenceOffset subjectEnd )
00052 {
00053   //  if (subjectNum!=lastSubjectNum_)
00054   //   subjectTable_.getSequenceName(name_,subjectNum);
00055   //  lastSubjectNum_=subjectNum;
00056   pStore_->addMatch( // name_,
00057                      subjectNum, 
00058                      numBases, 
00059                      queryStart, 
00060                      queryEnd, 
00061                      subjectStart, 
00062                      subjectEnd, 
00063                      isQueryForward_, 
00064                      true );
00065 
00066 } // ~void MatchAdderImp::operator()
00067 
00068 
00069 MatchAdderCodonCodon::MatchAdderCodonCodon( HashTableTranslated& subjectTable ) :
00070 MatchAdderImp( subjectTable ), subjectTable_( subjectTable ) {}
00071 
00072 MatchAdderProteinCodon::MatchAdderProteinCodon( HashTableTranslated& subjectTable ) :
00073 MatchAdderImp( subjectTable ), subjectTable_( subjectTable ) {}
00074 
00075 MatchAdderCodonProtein::MatchAdderCodonProtein( HashTableGeneric& subjectTable ) :
00076 MatchAdderImp( subjectTable ) {}
00077 
00078 
00079 void MatchAdderCodonProtein::operator()(  SequenceNumber subjectNum,
00080                                      SequenceOffset numBases,
00081                                      SequenceOffset queryStart,
00082                                      SequenceOffset queryEnd,
00083                                      SequenceOffset subjectStart,
00084                                      SequenceOffset subjectEnd )
00085 {
00086   //  if (subjectNum!=lastSubjectNum_)
00087   //    subjectTable_.getSequenceName(name_,subjectNum);
00088   //  lastSubjectNum_=subjectNum;
00089   pStore_->addMatch( // name_,
00090                      subjectNum, 
00091                      numBases, 
00092                      queryStart,
00093                      queryEnd, 
00094                      (subjectStart+2)/gNumReadingFrames, 
00095                      subjectEnd/gNumReadingFrames, 
00096                      isQueryForward_, true );
00097 } // ~void MatchAdderCodonProtein::operator()
00098 
00099 void MatchAdderCodonCodon::operator()(  SequenceNumber subjectNum,
00100                                      SequenceOffset numBases,
00101                                      SequenceOffset queryStart,
00102                                      SequenceOffset queryEnd,
00103                                      SequenceOffset subjectStart,
00104                                      SequenceOffset subjectEnd )
00105 {
00106   if (subjectNum!=lastSubjectNum_)
00107   {
00108     // subjectTable_.getSequenceName(name_,subjectNum);
00109     size_ = subjectTable_.getSequenceSize(subjectNum);
00110     lastSubjectNum_=subjectNum;
00111   } // ~if
00112   pStore_->addMatch( // name_,
00113                      subjectNum,
00114                      numBases, 
00115                      queryStart,
00116                      queryEnd,
00117                      //              subjectStart, subjectEnd,
00118                      subjectTable_.isForward() ? subjectStart
00119                      : size_ - subjectEnd + 1,
00120                      subjectTable_.isForward() ? subjectEnd
00121                      : size_ - subjectStart + 1, 
00122                       isQueryForward_, subjectTable_.isForward() );
00123   
00124 
00125 } // ~MatchAdderCodonCodon::operator()
00126 
00127 void MatchAdderProteinCodon::operator()(  SequenceNumber subjectNum,
00128                                      SequenceOffset numBases,
00129                                      SequenceOffset queryStart,
00130                                      SequenceOffset queryEnd,
00131                                      SequenceOffset subjectStart,
00132                                      SequenceOffset subjectEnd )
00133 {
00134 
00135 
00136   if (subjectNum!=lastSubjectNum_)
00137   {
00138     //    subjectTable_.getSequenceName(name_,subjectNum);
00139     size_ = subjectTable_.getSequenceSize(subjectNum);
00140     lastSubjectNum_=subjectNum;
00141   } // ~if
00142 
00143   //  cout << "MAPC: " << subjectNum << " " << queryStart << " " << queryEnd
00144   //   << " " << subjectStart << " " << subjectEnd << " - " << size_ << endl;
00145 
00146   pStore_->addMatch( // name_,
00147                      subjectNum, 
00148                      numBases/gNumReadingFrames, 
00149                      (queryStart+2)/gNumReadingFrames,  
00150                      queryEnd/gNumReadingFrames, 
00151                      //                              subjectStart,
00152                      //   subjectEnd,
00153                      subjectTable_.isForward() ? subjectStart
00154                      : size_ - subjectEnd + 1,
00155                      subjectTable_.isForward() ? subjectEnd
00156                      : size_ - subjectStart + 1, 
00157                      true,
00158                      subjectTable_.isForward() );
00159                      
00160 } // ~MatchAdderProteinCodon::operator()
00161 
00162 
00163 
00164 // MatchPolicy member function definitions
00165 
00166 MatchPolicy::MatchPolicy( HashTableGeneric& subjectTable ) :
00167   subjectTable_( subjectTable ), 
00168   queryWordLength_( subjectTable.getWordLength() ) 
00169 {}
00170 
00171 
00172 MatchPolicyDNADNA::MatchPolicyDNADNA( HashTableGeneric& subjectTable ) :
00173 MatchPolicy( subjectTable )
00174 {
00175  addMatch_ = new MatchAdderImp(subjectTable_); 
00176 }
00177 
00178 
00179 void MatchPolicyDNADNA::operator()
00180   ( WordSequence& querySeqFwd, MatchStore& store, 
00181     MatchAlgorithm& findMatch )
00182 {
00183 
00184   addMatch_->link(store);
00185   addMatch_->setQuerySize( ((querySeqFwd.size()-1) * queryWordLength_ )
00186       + querySeqFwd.getNumBasesInLast() );
00187 
00188   WordSequence querySeqRev;
00189   reverseComplement
00190     ( querySeqFwd, querySeqRev, 
00191       subjectTable_.getWordLength() ); 
00192 
00193   addMatch_->setQueryForward();
00194 
00195   findMatch( querySeqFwd, *addMatch_, subjectTable_ ); 
00196 
00197   addMatch_->setQueryReverse();
00198 
00199   findMatch( querySeqRev, *addMatch_, subjectTable_ );
00200 
00201 } // ~QueryManager::doMatch
00202 
00203 
00204 MatchPolicyProteinProtein::MatchPolicyProteinProtein
00205 ( HashTablePackedProtein& subjectTable ) :
00206 subjectTable_( subjectTable ),
00207 MatchPolicy( subjectTable )
00208 {
00209 
00210   subjectTable_.setQueryProtein();
00211   addMatch_ = new MatchAdderImp(subjectTable_);
00212 }
00213 
00214 void MatchPolicyProteinProtein::operator()
00215   ( WordSequence& querySeqFwd, MatchStore& store, 
00216     MatchAlgorithm& findMatch )
00217 {
00218 
00219   addMatch_->link(store);
00220   addMatch_->setQueryForward();
00221 
00222   findMatch( querySeqFwd, *addMatch_, subjectTable_ ); 
00223 
00224 } // ~QueryManager::doMatch
00225 
00226 
00227 MatchPolicyDNAProtein::MatchPolicyDNAProtein
00228 ( HashTablePackedProtein& subjectTable ) :
00229 MatchPolicy( subjectTable ),
00230 subjectTable_( subjectTable )
00231 {
00232   queryWordLength_ = gMaxBasesPerWord;
00233   subjectTable_.setQueryTranslatedDNA();
00234 
00235   addMatch_ = new MatchAdderCodonProtein(subjectTable_);
00236 
00237 } // ~MatchPolicyDNAProtein::MatchPolicyDNAProtein
00238 
00239 
00240 
00241 void MatchPolicyDNAProtein::operator()
00242   ( WordSequence& querySeqFwd, MatchStore& store, 
00243     MatchAlgorithm& findMatch )
00244 {
00245 
00246   addMatch_->link(store);
00247   addMatch_->setQueryForward();
00248 
00249   addMatch_->setQuerySize( ((querySeqFwd.size()-1) * gMaxBasesPerWord )
00250       + querySeqFwd.getNumBasesInLast() );
00251 
00252   WordSequence querySeqRev;
00253   reverseComplement
00254     ( querySeqFwd, querySeqRev, 
00255       queryWordLength_ ); 
00256 
00257   //  cout << "MPDP: doing fwd" << endl;
00258 
00259   addMatch_->setQueryForward();
00260 
00261   findMatch( querySeqFwd, *addMatch_, subjectTable_,
00262              gNumReadingFrames * subjectTable_.getWordLength() ); 
00263 
00264   //  cout << "MPDP: doing rev" << endl;
00265 
00266   addMatch_->setQueryReverse();
00267 
00268   findMatch( querySeqRev, *addMatch_, subjectTable_,
00269              gNumReadingFrames * subjectTable_.getWordLength() );
00270 
00271 
00272 } // ~void MatchPolicyDNAProtein::operator()
00273  
00274 
00275 // MatchPolicyProteinTranslated member functions
00276 
00277 MatchPolicyProteinTranslated::MatchPolicyProteinTranslated
00278 ( HashTableTranslated& subjectTable ) :
00279 MatchPolicy( subjectTable ),
00280 subjectTable_( subjectTable )
00281 {
00282   subjectTable_.setQueryProtein();
00283   addMatch_ = new MatchAdderProteinCodon(subjectTable_);
00284 } // ~MatchPolicyProteinTranslated::MatchPolicyProteinTranslated
00285 
00286 
00287 void MatchPolicyProteinTranslated::operator()
00288   ( WordSequence& querySeqFwd, MatchStore& store, 
00289     MatchAlgorithm& findMatch )
00290 {
00291 
00292   addMatch_->link(store);
00293   addMatch_->setQueryForward();
00294   addMatch_->setQuerySize( ((querySeqFwd.size()-1) * gMaxBasesPerWord )
00295       + querySeqFwd.getNumBasesInLast() );
00296 
00297   // Necessary to copy querySeqFwd - otherwise it is shifted by the
00298   // first findMatch, which thus screws up the second findMatch
00299   WordSequence querySeqCopy(querySeqFwd);
00300 
00301   //  addMatch_->setSubjectForward();
00302   //  cout << "MPPT: fwd" << endl;
00303   subjectTable_.setForward();
00304 
00305   findMatch( querySeqCopy, *addMatch_, subjectTable_, 
00306              gNumReadingFrames * subjectTable_.getWordLength() );
00307 
00308   //  addMatch_->setSubjectReverse();
00309   // cout << "MPPT: rev" << endl;
00310   subjectTable_.setReverse();
00311 
00312   findMatch( querySeqFwd, *addMatch_, subjectTable_,
00313              gNumReadingFrames * subjectTable_.getWordLength() );
00314 
00315 
00316 } // ~MatchPolicyProteinTranslated::operator()
00317 
00318 // MatchPolicyDNATranslated member function defs
00319 
00320 MatchPolicyDNATranslated::MatchPolicyDNATranslated
00321 ( HashTableTranslated& subjectTable ) :
00322 MatchPolicy( subjectTable ),
00323 subjectTable_( subjectTable )
00324 {
00325    queryWordLength_ = gMaxBasesPerWord;
00326   subjectTable_.setQueryTranslatedDNA();
00327      addMatch_ = new MatchAdderCodonCodon(subjectTable_);
00328      //  addMatch_ = new MatchAdderImp(subjectTable_);
00329 } // ~MatchPolicyDNATranslated::MatchPolicyDNATranslated
00330 
00331 
00332 
00333 
00334 void MatchPolicyDNATranslated::operator()
00335   ( WordSequence& querySeqFwd, MatchStore& store, 
00336     MatchAlgorithm& findMatch )
00337 {
00338 
00339   addMatch_->link(store);
00340 
00341   addMatch_->setQuerySize( ((querySeqFwd.size()-1) * gMaxBasesPerWord )
00342       + querySeqFwd.getNumBasesInLast() );
00343 
00344   WordSequence revSeq, translatedQuery;
00345 
00346   reverseComplement( querySeqFwd, revSeq, gMaxBasesPerWord );
00347 
00348   addMatch_->setQueryForward();
00349 
00350   //  addMatch_->setSubjectForward();
00351   subjectTable_.setForward();
00352 
00353   findMatch( querySeqFwd, *addMatch_, subjectTable_, 
00354              gNumReadingFrames * subjectTable_.getWordLength() );
00355 
00356   //  addMatch_->setSubjectReverse();
00357   subjectTable_.setReverse();
00358 
00359   findMatch( querySeqFwd, *addMatch_, subjectTable_, 
00360              gNumReadingFrames * subjectTable_.getWordLength() );
00361 
00362   addMatch_->setQueryReverse();
00363 
00364   //  addMatch_->setSubjectForward();
00365   subjectTable_.setForward();
00366 
00367   findMatch( revSeq, *addMatch_, subjectTable_, 
00368              gNumReadingFrames * subjectTable_.getWordLength() );
00369 
00370   //  addMatch_->setSubjectReverse();
00371   subjectTable_.setReverse();
00372 
00373   findMatch( revSeq, *addMatch_, subjectTable_, 
00374              gNumReadingFrames * subjectTable_.getWordLength() );
00375 
00376 
00377 
00378 } // ~MatchPolicyDNATranslated::operator()
00379 
00380 // QueryManager function definitions
00381 
00382 // Name:
00383 // Arguments:
00384 // TYPE  NAME  IN/OUT COMMENT
00385 // Returns: TYPE COMMENT
00386   QueryManager::QueryManager
00387   ( SequenceReader& querySeqs,  
00388     HashTableGeneric& subjectSeqs, ostream& monitoringStream ) :
00389    queryReader_( querySeqs ), 
00390    subjectTable_(  subjectSeqs ), 
00391    monitoringStream_( monitoringStream )
00392    {
00393      monitoringStream_ << "constructing QueryManager\n";
00394 
00395      HashTableTranslated* pTransTable
00396        (dynamic_cast<HashTableTranslated*>(&subjectSeqs));
00397 
00398      if ( pTransTable != NULL )
00399      {
00400        if (queryReader_.getBitsPerSymbol()==gResidueBits )
00401        {
00402          monitoringStream_ 
00403          << "Info: running protein query against translated DNA hash table.\n";
00404          policy_ = new MatchPolicyProteinTranslated(*pTransTable);
00405        }         
00406        else if (queryReader_.getBitsPerSymbol()==gBaseBits )
00407        {
00408          monitoringStream_ 
00409          << "Info: running DNA query against translated DNA hash table.\n";
00410          policy_ = new MatchPolicyDNATranslated(*pTransTable);
00411        }
00412        else assert (1==0);
00413      }
00414      else // not a translated table
00415      {
00416        HashTablePackedProtein* pProteinTable
00417          (dynamic_cast<HashTablePackedProtein*>(&subjectSeqs));
00418        if (pProteinTable!=NULL)
00419        {
00420 
00421          if (queryReader_.getBitsPerSymbol()==gResidueBits)
00422          {
00423            monitoringStream_ 
00424              << "Info: running protein query against protein hash table."
00425              << endl;
00426            policy_ = new MatchPolicyProteinProtein(*pProteinTable);
00427          } // ~if
00428          else if (queryReader_.getBitsPerSymbol()==gBaseBits)
00429          {
00430            monitoringStream_ 
00431              << "Info: running DNA query against protein hash table."
00432              << endl;
00433            policy_ = new MatchPolicyDNAProtein(*pProteinTable);
00434          } // ~else if
00435          else assert (1==0);  
00436        } // ~if  
00437        else if (subjectTable_.getBitsPerSymbol()==gBaseBits )
00438        {
00439          if (queryReader_.getBitsPerSymbol()==gBaseBits)
00440          {
00441            monitoringStream_ 
00442            << "Info: running DNA query against DNA hash table."
00443            << endl;
00444            policy_ = new MatchPolicyDNADNA(subjectSeqs);
00445          } // ~if
00446          else
00447          {
00448            monitoringStream_ 
00449            << "Error: can't run protein query against DNA hash table!\n";
00450            throw SSAHAException
00451              ("Can't run protein query against DNA hash table!");
00452          } // ~else
00453          
00454        } // ~else if
00455 
00456      } // ~else
00457 
00458    } // ~constructor
00459 
00460    QueryManager::~QueryManager()
00461    {
00462      monitoringStream_ << "destructing QueryManager\n";
00463      delete policy_;
00464    } // ~constructor
00465 
00466   void QueryManager::doQuery
00467   //  ( MatchStore& store, 
00468   ( MatchAlgorithm& match, 
00469     MatchTask& task,
00470     int queryStart, 
00471     int queryEnd )
00472   {
00473     int numBasesInLast(0);
00474     int wordLength( (*policy_).getWordLength() );
00475 
00476     WordSequence querySeqFwd;
00477 
00478     queryReader_.rewind();
00479 
00480     if ( queryStart == 1 ) 
00481     {
00482       numBasesInLast = queryReader_.getNextSequence
00483                          ( querySeqFwd, wordLength );
00484     } // ~if 
00485     else if ( queryStart > 1 ) 
00486     {
00487       numBasesInLast = queryReader_.getSequence
00488       ( querySeqFwd, queryStart, wordLength );
00489     } // ~else if
00490     else throw SSAHAException("Invalid query start value!");
00491 
00492     if ( numBasesInLast == -1 )
00493     {
00494       monitoringStream_ << "Info: requested sequence start (" << queryStart
00495       << ") exceeds number of sequences in query database.\n";
00496       return;
00497     } // ~if
00498 
00499     do
00500     {
00501 
00502       string queryName;
00503       queryReader_.getLastSequenceName( queryName );
00504       int queryNum = queryReader_.getLastSequenceNumber();
00505       int queryBases = ( querySeqFwd.size() - 1 )*wordLength 
00506         + querySeqFwd.getNumBasesInLast();
00507 
00508       MatchStoreImp store
00509       ( queryName, 
00510         queryNum, 
00511         queryBases,  
00512         subjectTable_.getNameReader() );
00513 
00514       //      cout << "QM: doing " << queryNum << endl;
00515       (*policy_)( querySeqFwd, store, match );
00516 
00517       store.setup();
00518 
00519       task( store );
00520 
00521       if ( queryReader_.getLastSequenceNumber() == queryEnd ) break;
00522 
00523       // clear the query sequence ready to read in next query
00524       querySeqFwd.clear();
00525 
00526 
00527       // read in next query
00528       numBasesInLast = queryReader_.getNextSequence
00529                          ( querySeqFwd, wordLength );
00530     } // ~while
00531     while ( numBasesInLast != -1 );
00532 
00533     if (    ( queryReader_.getLastSequenceNumber() < queryEnd  )
00534          && ( queryEnd != - 1 ) )
00535     {
00536       monitoringStream_ << "Info: requested final sequence (" << queryEnd
00537       << ") exceeded number of\nsequences in query database ("
00538       << queryReader_.getLastSequenceNumber() << ").\n";
00539     } // ~if
00540     
00541     return;
00542 
00543   } // ~QueryManager::doQuery
00544 
00545 
00546 
00547 
00548 // End of file QueryManager.cpp
00549 
00550 
00551 
00552 
00553 
00554 
00555 
00556 
00557 
00558 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2