00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "QueryManager.h"
00034 #include "SequenceReader.h"
00035 #include "SequenceEncoder.h"
00036 #include "HashTableGeneric.h"
00037 #include "HashTableTranslated.h"
00038 #include "MatchStoreUngapped.h"
00039 #include "MatchStoreGapped.h"
00040
00041
00042
00043
00044
00045
00046 void MatchAdderImp::operator()( SequenceNumber subjectNum,
00047 SequenceOffset numBases,
00048 SequenceOffset queryStart,
00049 SequenceOffset queryEnd,
00050 SequenceOffset subjectStart,
00051 SequenceOffset subjectEnd )
00052 {
00053
00054
00055
00056 pStore_->addMatch(
00057 subjectNum,
00058 numBases,
00059 queryStart,
00060 queryEnd,
00061 subjectStart,
00062 subjectEnd,
00063 isQueryForward_,
00064 true );
00065
00066 }
00067
00068
00069 MatchAdderCodonCodon::MatchAdderCodonCodon( HashTableTranslated& subjectTable ) :
00070 MatchAdderImp( subjectTable ), subjectTable_( subjectTable ) {}
00071
00072 MatchAdderProteinCodon::MatchAdderProteinCodon( HashTableTranslated& subjectTable ) :
00073 MatchAdderImp( subjectTable ), subjectTable_( subjectTable ) {}
00074
00075 MatchAdderCodonProtein::MatchAdderCodonProtein( HashTableGeneric& subjectTable ) :
00076 MatchAdderImp( subjectTable ) {}
00077
00078
00079 void MatchAdderCodonProtein::operator()( SequenceNumber subjectNum,
00080 SequenceOffset numBases,
00081 SequenceOffset queryStart,
00082 SequenceOffset queryEnd,
00083 SequenceOffset subjectStart,
00084 SequenceOffset subjectEnd )
00085 {
00086
00087
00088
00089 pStore_->addMatch(
00090 subjectNum,
00091 numBases,
00092 queryStart,
00093 queryEnd,
00094 (subjectStart+2)/gNumReadingFrames,
00095 subjectEnd/gNumReadingFrames,
00096 isQueryForward_, true );
00097 }
00098
00099 void MatchAdderCodonCodon::operator()( SequenceNumber subjectNum,
00100 SequenceOffset numBases,
00101 SequenceOffset queryStart,
00102 SequenceOffset queryEnd,
00103 SequenceOffset subjectStart,
00104 SequenceOffset subjectEnd )
00105 {
00106 if (subjectNum!=lastSubjectNum_)
00107 {
00108
00109 size_ = subjectTable_.getSequenceSize(subjectNum);
00110 lastSubjectNum_=subjectNum;
00111 }
00112 pStore_->addMatch(
00113 subjectNum,
00114 numBases,
00115 queryStart,
00116 queryEnd,
00117
00118 subjectTable_.isForward() ? subjectStart
00119 : size_ - subjectEnd + 1,
00120 subjectTable_.isForward() ? subjectEnd
00121 : size_ - subjectStart + 1,
00122 isQueryForward_, subjectTable_.isForward() );
00123
00124
00125 }
00126
00127 void MatchAdderProteinCodon::operator()( SequenceNumber subjectNum,
00128 SequenceOffset numBases,
00129 SequenceOffset queryStart,
00130 SequenceOffset queryEnd,
00131 SequenceOffset subjectStart,
00132 SequenceOffset subjectEnd )
00133 {
00134
00135
00136 if (subjectNum!=lastSubjectNum_)
00137 {
00138
00139 size_ = subjectTable_.getSequenceSize(subjectNum);
00140 lastSubjectNum_=subjectNum;
00141 }
00142
00143
00144
00145
00146 pStore_->addMatch(
00147 subjectNum,
00148 numBases/gNumReadingFrames,
00149 (queryStart+2)/gNumReadingFrames,
00150 queryEnd/gNumReadingFrames,
00151
00152
00153 subjectTable_.isForward() ? subjectStart
00154 : size_ - subjectEnd + 1,
00155 subjectTable_.isForward() ? subjectEnd
00156 : size_ - subjectStart + 1,
00157 true,
00158 subjectTable_.isForward() );
00159
00160 }
00161
00162
00163
00164
00165
00166 MatchPolicy::MatchPolicy( HashTableGeneric& subjectTable ) :
00167 subjectTable_( subjectTable ),
00168 queryWordLength_( subjectTable.getWordLength() )
00169 {}
00170
00171
00172 MatchPolicyDNADNA::MatchPolicyDNADNA( HashTableGeneric& subjectTable ) :
00173 MatchPolicy( subjectTable )
00174 {
00175 addMatch_ = new MatchAdderImp(subjectTable_);
00176 }
00177
00178
00179 void MatchPolicyDNADNA::operator()
00180 ( WordSequence& querySeqFwd, MatchStore& store,
00181 MatchAlgorithm& findMatch )
00182 {
00183
00184 addMatch_->link(store);
00185 addMatch_->setQuerySize( ((querySeqFwd.size()-1) * queryWordLength_ )
00186 + querySeqFwd.getNumBasesInLast() );
00187
00188 WordSequence querySeqRev;
00189 reverseComplement
00190 ( querySeqFwd, querySeqRev,
00191 subjectTable_.getWordLength() );
00192
00193 addMatch_->setQueryForward();
00194
00195 findMatch( querySeqFwd, *addMatch_, subjectTable_ );
00196
00197 addMatch_->setQueryReverse();
00198
00199 findMatch( querySeqRev, *addMatch_, subjectTable_ );
00200
00201 }
00202
00203
00204 MatchPolicyProteinProtein::MatchPolicyProteinProtein
00205 ( HashTablePackedProtein& subjectTable ) :
00206 subjectTable_( subjectTable ),
00207 MatchPolicy( subjectTable )
00208 {
00209
00210 subjectTable_.setQueryProtein();
00211 addMatch_ = new MatchAdderImp(subjectTable_);
00212 }
00213
00214 void MatchPolicyProteinProtein::operator()
00215 ( WordSequence& querySeqFwd, MatchStore& store,
00216 MatchAlgorithm& findMatch )
00217 {
00218
00219 addMatch_->link(store);
00220 addMatch_->setQueryForward();
00221
00222 findMatch( querySeqFwd, *addMatch_, subjectTable_ );
00223
00224 }
00225
00226
00227 MatchPolicyDNAProtein::MatchPolicyDNAProtein
00228 ( HashTablePackedProtein& subjectTable ) :
00229 MatchPolicy( subjectTable ),
00230 subjectTable_( subjectTable )
00231 {
00232 queryWordLength_ = gMaxBasesPerWord;
00233 subjectTable_.setQueryTranslatedDNA();
00234
00235 addMatch_ = new MatchAdderCodonProtein(subjectTable_);
00236
00237 }
00238
00239
00240
00241 void MatchPolicyDNAProtein::operator()
00242 ( WordSequence& querySeqFwd, MatchStore& store,
00243 MatchAlgorithm& findMatch )
00244 {
00245
00246 addMatch_->link(store);
00247 addMatch_->setQueryForward();
00248
00249 addMatch_->setQuerySize( ((querySeqFwd.size()-1) * gMaxBasesPerWord )
00250 + querySeqFwd.getNumBasesInLast() );
00251
00252 WordSequence querySeqRev;
00253 reverseComplement
00254 ( querySeqFwd, querySeqRev,
00255 queryWordLength_ );
00256
00257
00258
00259 addMatch_->setQueryForward();
00260
00261 findMatch( querySeqFwd, *addMatch_, subjectTable_,
00262 gNumReadingFrames * subjectTable_.getWordLength() );
00263
00264
00265
00266 addMatch_->setQueryReverse();
00267
00268 findMatch( querySeqRev, *addMatch_, subjectTable_,
00269 gNumReadingFrames * subjectTable_.getWordLength() );
00270
00271
00272 }
00273
00274
00275
00276
00277 MatchPolicyProteinTranslated::MatchPolicyProteinTranslated
00278 ( HashTableTranslated& subjectTable ) :
00279 MatchPolicy( subjectTable ),
00280 subjectTable_( subjectTable )
00281 {
00282 subjectTable_.setQueryProtein();
00283 addMatch_ = new MatchAdderProteinCodon(subjectTable_);
00284 }
00285
00286
00287 void MatchPolicyProteinTranslated::operator()
00288 ( WordSequence& querySeqFwd, MatchStore& store,
00289 MatchAlgorithm& findMatch )
00290 {
00291
00292 addMatch_->link(store);
00293 addMatch_->setQueryForward();
00294 addMatch_->setQuerySize( ((querySeqFwd.size()-1) * gMaxBasesPerWord )
00295 + querySeqFwd.getNumBasesInLast() );
00296
00297
00298
00299 WordSequence querySeqCopy(querySeqFwd);
00300
00301
00302
00303 subjectTable_.setForward();
00304
00305 findMatch( querySeqCopy, *addMatch_, subjectTable_,
00306 gNumReadingFrames * subjectTable_.getWordLength() );
00307
00308
00309
00310 subjectTable_.setReverse();
00311
00312 findMatch( querySeqFwd, *addMatch_, subjectTable_,
00313 gNumReadingFrames * subjectTable_.getWordLength() );
00314
00315
00316 }
00317
00318
00319
00320 MatchPolicyDNATranslated::MatchPolicyDNATranslated
00321 ( HashTableTranslated& subjectTable ) :
00322 MatchPolicy( subjectTable ),
00323 subjectTable_( subjectTable )
00324 {
00325 queryWordLength_ = gMaxBasesPerWord;
00326 subjectTable_.setQueryTranslatedDNA();
00327 addMatch_ = new MatchAdderCodonCodon(subjectTable_);
00328
00329 }
00330
00331
00332
00333
00334 void MatchPolicyDNATranslated::operator()
00335 ( WordSequence& querySeqFwd, MatchStore& store,
00336 MatchAlgorithm& findMatch )
00337 {
00338
00339 addMatch_->link(store);
00340
00341 addMatch_->setQuerySize( ((querySeqFwd.size()-1) * gMaxBasesPerWord )
00342 + querySeqFwd.getNumBasesInLast() );
00343
00344 WordSequence revSeq, translatedQuery;
00345
00346 reverseComplement( querySeqFwd, revSeq, gMaxBasesPerWord );
00347
00348 addMatch_->setQueryForward();
00349
00350
00351 subjectTable_.setForward();
00352
00353 findMatch( querySeqFwd, *addMatch_, subjectTable_,
00354 gNumReadingFrames * subjectTable_.getWordLength() );
00355
00356
00357 subjectTable_.setReverse();
00358
00359 findMatch( querySeqFwd, *addMatch_, subjectTable_,
00360 gNumReadingFrames * subjectTable_.getWordLength() );
00361
00362 addMatch_->setQueryReverse();
00363
00364
00365 subjectTable_.setForward();
00366
00367 findMatch( revSeq, *addMatch_, subjectTable_,
00368 gNumReadingFrames * subjectTable_.getWordLength() );
00369
00370
00371 subjectTable_.setReverse();
00372
00373 findMatch( revSeq, *addMatch_, subjectTable_,
00374 gNumReadingFrames * subjectTable_.getWordLength() );
00375
00376
00377
00378 }
00379
00380
00381
00382
00383
00384
00385
00386 QueryManager::QueryManager
00387 ( SequenceReader& querySeqs,
00388 HashTableGeneric& subjectSeqs, ostream& monitoringStream ) :
00389 queryReader_( querySeqs ),
00390 subjectTable_( subjectSeqs ),
00391 monitoringStream_( monitoringStream )
00392 {
00393 monitoringStream_ << "constructing QueryManager\n";
00394
00395 HashTableTranslated* pTransTable
00396 (dynamic_cast<HashTableTranslated*>(&subjectSeqs));
00397
00398 if ( pTransTable != NULL )
00399 {
00400 if (queryReader_.getBitsPerSymbol()==gResidueBits )
00401 {
00402 monitoringStream_
00403 << "Info: running protein query against translated DNA hash table.\n";
00404 policy_ = new MatchPolicyProteinTranslated(*pTransTable);
00405 }
00406 else if (queryReader_.getBitsPerSymbol()==gBaseBits )
00407 {
00408 monitoringStream_
00409 << "Info: running DNA query against translated DNA hash table.\n";
00410 policy_ = new MatchPolicyDNATranslated(*pTransTable);
00411 }
00412 else assert (1==0);
00413 }
00414 else
00415 {
00416 HashTablePackedProtein* pProteinTable
00417 (dynamic_cast<HashTablePackedProtein*>(&subjectSeqs));
00418 if (pProteinTable!=NULL)
00419 {
00420
00421 if (queryReader_.getBitsPerSymbol()==gResidueBits)
00422 {
00423 monitoringStream_
00424 << "Info: running protein query against protein hash table."
00425 << endl;
00426 policy_ = new MatchPolicyProteinProtein(*pProteinTable);
00427 }
00428 else if (queryReader_.getBitsPerSymbol()==gBaseBits)
00429 {
00430 monitoringStream_
00431 << "Info: running DNA query against protein hash table."
00432 << endl;
00433 policy_ = new MatchPolicyDNAProtein(*pProteinTable);
00434 }
00435 else assert (1==0);
00436 }
00437 else if (subjectTable_.getBitsPerSymbol()==gBaseBits )
00438 {
00439 if (queryReader_.getBitsPerSymbol()==gBaseBits)
00440 {
00441 monitoringStream_
00442 << "Info: running DNA query against DNA hash table."
00443 << endl;
00444 policy_ = new MatchPolicyDNADNA(subjectSeqs);
00445 }
00446 else
00447 {
00448 monitoringStream_
00449 << "Error: can't run protein query against DNA hash table!\n";
00450 throw SSAHAException
00451 ("Can't run protein query against DNA hash table!");
00452 }
00453
00454 }
00455
00456 }
00457
00458 }
00459
00460 QueryManager::~QueryManager()
00461 {
00462 monitoringStream_ << "destructing QueryManager\n";
00463 delete policy_;
00464 }
00465
00466 void QueryManager::doQuery
00467
00468 ( MatchAlgorithm& match,
00469 MatchTask& task,
00470 int queryStart,
00471 int queryEnd )
00472 {
00473 int numBasesInLast(0);
00474 int wordLength( (*policy_).getWordLength() );
00475
00476 WordSequence querySeqFwd;
00477
00478 queryReader_.rewind();
00479
00480 if ( queryStart == 1 )
00481 {
00482 numBasesInLast = queryReader_.getNextSequence
00483 ( querySeqFwd, wordLength );
00484 }
00485 else if ( queryStart > 1 )
00486 {
00487 numBasesInLast = queryReader_.getSequence
00488 ( querySeqFwd, queryStart, wordLength );
00489 }
00490 else throw SSAHAException("Invalid query start value!");
00491
00492 if ( numBasesInLast == -1 )
00493 {
00494 monitoringStream_ << "Info: requested sequence start (" << queryStart
00495 << ") exceeds number of sequences in query database.\n";
00496 return;
00497 }
00498
00499 do
00500 {
00501
00502 string queryName;
00503 queryReader_.getLastSequenceName( queryName );
00504 int queryNum = queryReader_.getLastSequenceNumber();
00505 int queryBases = ( querySeqFwd.size() - 1 )*wordLength
00506 + querySeqFwd.getNumBasesInLast();
00507
00508 MatchStoreImp store
00509 ( queryName,
00510 queryNum,
00511 queryBases,
00512 subjectTable_.getNameReader() );
00513
00514
00515 (*policy_)( querySeqFwd, store, match );
00516
00517 store.setup();
00518
00519 task( store );
00520
00521 if ( queryReader_.getLastSequenceNumber() == queryEnd ) break;
00522
00523
00524 querySeqFwd.clear();
00525
00526
00527
00528 numBasesInLast = queryReader_.getNextSequence
00529 ( querySeqFwd, wordLength );
00530 }
00531 while ( numBasesInLast != -1 );
00532
00533 if ( ( queryReader_.getLastSequenceNumber() < queryEnd )
00534 && ( queryEnd != - 1 ) )
00535 {
00536 monitoringStream_ << "Info: requested final sequence (" << queryEnd
00537 << ") exceeded number of\nsequences in query database ("
00538 << queryReader_.getLastSequenceNumber() << ").\n";
00539 }
00540
00541 return;
00542
00543 }
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558