00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "GlobalDefinitions.h"
00034 #include "HashTable.h"
00035 #include "HashTablePacked.h"
00036 #include "HashTableTranslated.h"
00037 #include "SequenceReaderString.h"
00038 #include "SequenceReaderFasta.h"
00039 #include "SequenceEncoder.h"
00040 #include "GenerateTestFastaFiles.h"
00041 #include "MatchStoreUngapped.h"
00042 #include "MatchStore.h"
00043 #include "QueryManager.h"
00044 #include <assert.h>
00045 #include <string>
00046
00047
00048
00049
00050 class HashTableTranslatedPeek : public HashTableTranslated
00051 {
00052 public:
00053 HashTableTranslatedPeek( void ): HashTableTranslated(cout) {}
00054 HashTableComponent& peekFwd( void ) { return hashFwd_; }
00055 HashTableComponent& peekRev( void ) { return hashRev_; }
00056
00057 };
00058
00059
00060
00061
00062 int main( void )
00063 {
00064
00065 try {
00066
00067 int numTests = 0;
00068
00069 cout << "*******************************************" << endl << endl;
00070 cout << " Test of class HashTable" << endl << endl;
00071 cout << "*******************************************" << endl << endl;
00072
00073 int numSeqs = 10;
00074 int seqSize = 100;
00075 int wordLength = 10;
00076 int maxHits = 50;
00077
00078
00079
00080 BaseGenerator testBases( numSeqs * seqSize, 1128 );
00081
00082
00083
00084
00085 testBases.generateSubjectFile(seqSize,0);
00086
00087 SequenceReaderFasta testReader("test_subject.fasta",cout);
00088
00089
00090
00091 cout << "Test " << ++numTests <<": test of function getNextSequence"
00092 << endl << endl;
00093
00094 HashTable testHash(cout,"test_save");
00095 HashTableFactory creator(cout);
00096
00097
00098
00099 assert(testHash.isInitialized() == false );
00100
00101 creator.createHashTable(testHash,testReader,wordLength,maxHits);
00102
00103
00104 assert(testHash.isInitialized() == true );
00105
00106 testReader.rewind();
00107
00108 HitListVector hits;
00109
00110 WordSequence seq;
00111
00112 string name1, name2, name3;
00113
00114
00115
00116
00117
00118
00119 for ( int i(1) ; i <= numSeqs ; i++ )
00120 {
00121 testReader.getNextSequence(seq,wordLength);
00122
00123
00124 testReader.getLastSequenceName(name1);
00125 testHash.getSequenceName(name2,i);
00126
00127
00128 name3 = (string) testHash.getSequenceName(i);
00129 assert(name1 == name2 );
00130 assert(name2 == name3 );
00131 cout << seq.getNumBasesInLast() << "!!\n";
00132
00133
00134 for ( int j(0) ; j < (seqSize/wordLength) ; j++ )
00135 {
00136 cout << j << " " << printBase(seq[j],wordLength) << endl;
00137
00138 assert((seq[j]&gCursedWord)==0);
00139 testHash.matchWord(seq[j],hits,0);
00140
00141 cout << hits.size() <<endl;
00142 assert(hits.size() == 1);
00143
00144
00145
00146
00147
00148 assert(hits[0].subjectNum == i );
00149 assert(hits[0].diff == j*wordLength );
00150 assert(hits[0].queryPos == 1);
00151
00152 hits.clear();
00153
00154 }
00155
00156
00157 testHash.matchWord(seq,hits);
00158 cout << hits.size() << "!!!!!\n";
00159 assert( hits.size() == (seqSize/wordLength) );
00160 for ( int j(0) ; j < hits.size() ; j ++ )
00161 {
00162
00163 assert(hits[j].subjectNum == i);
00164 assert(hits[j].diff == 0);
00165 assert(hits[j].queryPos == 1 + ( j * wordLength ) );
00166
00167 }
00168
00169 hits.clear();
00170 seq.clear();
00171 }
00172
00173
00174 cout << "Test passed!" << endl << endl;
00175
00176
00177
00178 cout << "Test " << ++numTests <<": test of save and load of hash table"
00179 << endl << endl;
00180
00181
00182
00183
00184 creator.saveHashTable(testHash);
00185
00186
00187 HashTable testLoad(cout,"test_save");
00188
00189
00190
00191 creator.loadHashTable(testLoad);
00192
00193
00194 assert( testLoad.isInitialized() == true );
00195
00196
00197 assert( testLoad.getWordLength() == wordLength );
00198
00199 testLoad.setMaxNumHits( testHash.getMaxNumHits() );
00200
00201 assert( testHash.getMaxNumHits()==testLoad.getMaxNumHits() );
00202
00203 HitListVector hitsOrig, hitsCopy;
00204
00205 testReader.rewind();
00206
00207
00208
00209
00210
00211 for ( int i(0) ; i < numSeqs ; i++ )
00212 {
00213
00214
00215 testHash.getSequenceName(name1,i+1);
00216 testLoad.getSequenceName(name2,i+1);
00217 assert(name1 == name2 );
00218 assert(name1 == (string)testHash.getSequenceName(i+1));
00219 assert(name1 == (string)testLoad.getSequenceName(i+1));
00220
00221
00222
00223
00224 assert( testHash.getSequenceSize(i+1) == testLoad.getSequenceSize(i+1) );
00225
00226
00227
00228
00229 seq.clear();
00230 testReader.getNextSequence(seq,wordLength);
00231 testHash.matchWord(seq,hitsOrig);
00232 testLoad.matchWord(seq,hitsCopy);
00233 }
00234
00235
00236
00237
00238
00239
00240
00241 assert(hitsOrig.size() == hitsCopy.size() );
00242 assert(hitsOrig == hitsCopy );
00243
00244 cout << "Test passed!" << endl << endl;
00245
00246
00247
00248 cout << "Test " << ++numTests <<": further test of function matchWord"
00249 << endl << endl;
00250
00251
00252 string testSeq =
00253 "AGCTGGCTAGTCTAACGTGTCTGTAGATCGTACGTGCATGCATGGGCAGTCTCTGTATCACTGGGTGGCC";
00254
00255
00256 SequenceReaderString shiftReader(testSeq,cout);
00257
00258 HashTable shiftHash(cout);
00259 creator.createHashTable(shiftHash,shiftReader,wordLength,maxHits);
00260
00261 for ( int i(0) ; i < wordLength ; i++ )
00262 {
00263 testSeq = testSeq.substr(1);
00264 {
00265 SequenceReaderString queryReader(testSeq,cout);
00266 seq.clear();
00267 hits.clear();
00268 queryReader.getNextSequence(seq,wordLength);
00269 shiftHash.matchWord(seq,hits);
00270
00271 if (i==(wordLength-1))
00272 assert( hits.size() == seq.size() - 1 );
00273 else assert( hits.size() == 0 );
00274 }
00275 }
00276
00277 cout << "Test passed!" << endl << endl;
00278
00279
00280
00281 cout << "Test " << ++numTests
00282 <<": test that only limited number of hits are stored"
00283 << endl << endl;
00284
00285 string tenAs = "AAAAAAAAAA";
00286 testSeq = "";
00287 for ( int i(0) ; i < 50 ; i++ ) testSeq += tenAs;
00288
00289
00290
00291
00292
00293 SequenceReaderString reader(testSeq,cout);
00294 HashTable hash(cout);
00295 creator.createHashTable(hash,reader,10,20);
00296 assert(hash.getMaxNumHits()==20);
00297
00298 hits.clear();
00299
00300 Word allAs(0);
00301
00302 hash.matchWord(allAs,hits);
00303
00304 assert(hits.size()==0);
00305 cout << hash.size(allAs) << endl;
00306
00307 hash.setMaxNumHits(50);
00308 assert(hash.getMaxNumHits()==50);
00309 hash.matchWord(allAs,hits);
00310 assert(hits.size()==50);
00311
00312 cout << "Test passed!" << endl << endl;
00313
00314
00315
00316 cout << "Test " << ++numTests
00317 <<": test of tandem repeat masking\n\n";
00318
00319
00320
00321 string repeats[] =
00322 {
00323 "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG",
00324 "GCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG",
00325 "AGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCA",
00326 "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG",
00327 "ATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGATCAGA"
00328 };
00329
00330 for ( int j(1) ; j <= 5 ; ++j )
00331 {
00332
00333 SequenceReaderString readerRepeats(repeats[j-1],cout);
00334 HashTable hashRepeats(cout);
00335 creator.createHashTable(hashRepeats,readerRepeats,10,20);
00336
00337 readerRepeats.rewind();
00338
00339 WordSequence seqRepeats;
00340
00341 readerRepeats.getNextSequence( seqRepeats,10 );
00342
00343 HitListVector hlFwd, hlRev;
00344
00345
00346
00347
00348 cout << "hello\n";
00349
00350 hashRepeats.setNumRepeats(j);
00351
00352 hashRepeats.matchSequence( seqRepeats, hlFwd );
00353
00354 WordSequence rc;
00355
00356 reverseComplement( seqRepeats, rc, 10 );
00357
00358
00359 hashRepeats.matchSequence( rc, hlRev );
00360
00361
00362 cout << "Repeats masked: " << j
00363 << "\nHits found in forward direction: " << hlFwd.size()
00364 << "\nHits found in reverse direction: " << hlRev.size() << "\n";
00365
00366 assert ( hlFwd.size() == 9 );
00367
00368 if ( ( j % 2 ) == 0 ) assert( hlRev.size() == 9 );
00369 else assert( hlRev.size() == 0 );
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381 }
00382
00383 for ( int j(1) ; j <= 5 ; ++j )
00384 {
00385
00386 SequenceReaderString readerRepeats(repeats[j-1],cout);
00387 HashTablePacked hashRepeats(cout);
00388 creator.createHashTable(hashRepeats,readerRepeats,10,20);
00389
00390 readerRepeats.rewind();
00391
00392 WordSequence seqRepeats;
00393
00394 readerRepeats.getNextSequence( seqRepeats,10 );
00395
00396 HitListVector hlFwd, hlRev;
00397
00398
00399
00400
00401 cout << "hello\n";
00402
00403 hashRepeats.setNumRepeats(j);
00404
00405 hashRepeats.matchSequence( seqRepeats, hlFwd );
00406
00407 WordSequence rc;
00408
00409 reverseComplement( seqRepeats, rc, 10 );
00410
00411
00412 hashRepeats.matchSequence( rc, hlRev );
00413
00414
00415 cout << "Repeats masked: " << j
00416 << "\nHits found in forward direction: " << hlFwd.size()
00417 << "\nHits found in reverse direction: " << hlRev.size() << "\n";
00418
00419 assert ( hlFwd.size() == 9 );
00420
00421 if ( ( j % 2 ) == 0 ) assert( hlRev.size() == 9 );
00422 else assert( hlRev.size() == 0 );
00423
00424
00425 }
00426
00427
00428 cout << "Test passed!" << endl << endl;
00429
00430
00431
00432 cout << "Test " << ++numTests
00433 <<": test of HashTableTranslated\n\n";
00434 {
00435 SequenceReaderFasta testReader( "test.fasta" );
00436
00437 int wordLength(4);
00438
00439 HashTableTranslated hashTrans(cout,"test_trans");
00440
00441 hashTrans.createHashTable( testReader, wordLength, 10000 );
00442 assert(hashTrans.isInitialized());
00443
00444 hashTrans.saveHashTable();
00445
00446 HashTableTranslated hashTrans2(cout,"test_trans");
00447 hashTrans2.loadHashTable();
00448 assert( hashTrans2.isInitialized() );
00449 assert( hashTrans.getNumSequences() == hashTrans2.getNumSequences() );
00450 string s1,s2;
00451
00452 cout << hashTrans.getMaxNumHits() << " HH " << hashTrans2.getMaxNumHits() << endl;
00453
00454
00455 testReader.rewind();
00456 HitListVector h1, h2;
00457 WordSequence w1,w2;
00458 WordSequence t1,t2;
00459 CodonList codons;
00460 SequenceEncoderCodon encoder;
00461 encoder.setWordLength(wordLength);
00462
00463 for ( int i(1) ; i < hashTrans.getNumSequences() ; i++ )
00464 {
00465
00466 hashTrans.getSequenceName( s1, i );
00467 hashTrans2.getSequenceName( s2, i );
00468 assert (s1 == s2 );
00469 assert (s1 == (string) hashTrans.getSequenceName(i) );
00470 assert (s1 == (string) hashTrans2.getSequenceName(i) );
00471
00472 h1.clear(); h2.clear(); w1.clear(); w2.clear();
00473 testReader.getNextSequence( w1, gMaxBasesPerWord );
00474 cout << printWord( w1, wordLength ) << endl;
00475 w2=w1;
00476
00477 assert(hashTrans.getSequenceSize(i)==hashTrans2.getSequenceSize(i));
00478
00479 assert( (((w1.size()-1) * gMaxBasesPerWord ) + w1.getNumBasesInLast())
00480 == hashTrans.getSequenceSize(i));
00481
00482
00483
00484
00485 hashTrans.setForward(); assert(hashTrans.isForward());
00486 hashTrans2.setForward(); assert(hashTrans2.isForward());
00487
00488 hashTrans.setQueryTranslatedDNA();
00489 hashTrans2.setQueryTranslatedDNA();
00490 hashTrans.matchSequence( w1, h1 );
00491 hashTrans2.matchSequence( w2, h2 );
00492 assert(h1.size()!=0);
00493 assert(h1.size()==h2.size());
00494 assert(h1==h2);
00495 assert(w1==w2);
00496
00497
00498 hashTrans.setQueryProtein();
00499 hashTrans2.setQueryProtein();
00500 for ( int rf(0) ; rf < gNumReadingFrames ; rf++ )
00501 {
00502 codons.clear();
00503 codonize( w1, codons, rf );
00504 encoder.linkSeq(t1);
00505 encoder.encode( codons );
00506 encoder.unlinkSeq();
00507
00508 t2=t1;
00509 h1.clear();h2.clear();
00510 hashTrans.matchSequence( t1, h1 );
00511 hashTrans2.matchSequence( t2, h2 );
00512 assert(h1.size()!=0);
00513 assert(h1.size()==h2.size());
00514 assert(h1==h2);
00515
00516 }
00517
00518
00519
00520
00521 w1.clear();
00522 reverseComplement( w2,w1, gMaxBasesPerWord );
00523 w2=w1;
00524
00525 h1.clear(); h2.clear();
00526 hashTrans.setQueryTranslatedDNA();
00527 hashTrans2.setQueryTranslatedDNA();
00528 hashTrans.setReverse(); assert(!hashTrans.isForward());
00529 hashTrans2.setReverse(); assert(!hashTrans2.isForward());
00530 hashTrans.matchSequenceTranslatedDNA( w1, h1 );
00531 hashTrans2.matchSequenceTranslatedDNA( w2, h2 );
00532 assert(h1.size()!=0);
00533 assert(h1.size()==h2.size());
00534 assert(h1==h2);
00535 assert(w1==w2);
00536
00537
00538 hashTrans.setQueryProtein();
00539 hashTrans2.setQueryProtein();
00540 for ( int rf(0) ; rf < gNumReadingFrames ; rf++ )
00541 {
00542 codons.clear();
00543 codonize( w1, codons, rf );
00544 encoder.linkSeq(t1);
00545 encoder.encode( codons );
00546 encoder.unlinkSeq();
00547
00548 t2=t1;
00549 h1.clear();h2.clear();
00550 hashTrans.matchSequence( t1, h1 );
00551 hashTrans2.matchSequence( t2, h2 );
00552 assert(h1.size()!=0);
00553 assert(h1.size()==h2.size());
00554 assert(h1==h2);
00555
00556 }
00557
00558
00559 }
00560
00561
00562 }
00563
00564 cout << "Test passed!\n\n";
00565
00566
00567
00568
00569 cout << "Test " << ++numTests
00570 <<": test of substitute generation\n\n";
00571
00572 {
00573 string s, s1;
00574 Word w;
00575 vector<Word> subs;
00576 int wl=15;
00577
00578
00579
00580
00581 for (int i(0); i<wl; i++) s+="A";
00582
00583 w = makeBase(s);
00584
00585 generateSubstitutesDNA( w, subs, wl );
00586 assert(subs.size()==wl);
00587
00588 for (int i(0); i<wl ; i++)
00589 {
00590 cout << printWord(subs[i],wl) << endl;
00591 s1=s;
00592 s1[wl-i-1]='G';
00593 assert(makeBase(s1)==subs[i]);
00594 }
00595
00596
00597 s=""; subs.clear();
00598
00599 for (int i(0); i<wl; i++) s+="C";
00600
00601 w = makeBase(s);
00602
00603 generateSubstitutesDNA( w, subs, wl );
00604 assert(subs.size()==wl);
00605
00606 for (int i(0); i<wl ; i++)
00607 {
00608 cout << printWord(subs[i],wl) << endl;
00609 s1=s;
00610 s1[wl-i-1]='T';
00611 assert(makeBase(s1)==subs[i]);
00612 }
00613
00614
00615 s=""; subs.clear();
00616
00617 for (int i(0); i<wl; i++) s+="G";
00618
00619 w = makeBase(s);
00620
00621 generateSubstitutesDNA( w, subs, wl );
00622 assert(subs.size()==wl);
00623
00624 for (int i(0); i<wl ; i++)
00625 {
00626 cout << printWord(subs[i],wl) << endl;
00627 s1=s;
00628 s1[wl-i-1]='A';
00629 assert(makeBase(s1)==subs[i]);
00630 }
00631
00632
00633 s=""; subs.clear();
00634
00635 for (int i(0); i<wl; i++) s+="T";
00636
00637 w = makeBase(s);
00638
00639 generateSubstitutesDNA( w, subs, wl );
00640 assert(subs.size()==wl);
00641
00642 for (int i(0); i<wl ; i++)
00643 {
00644 cout << printWord(subs[i],wl) << endl;
00645 s1=s;
00646 s1[wl-i-1]='C';
00647 assert(makeBase(s1)==subs[i]);
00648 }
00649
00650
00651 for (int i(0); i<gNumCodonEncodings; i++)
00652 {
00653 s=""; s+= gResidueNames[i];
00654 w = makeResidue(s);
00655 subs.clear();
00656 generateSubstitutesProtein( w, subs, 1 );
00657 cout << "Residue: " << s << " substitutes: ";
00658 for (vector<Word>::iterator j(subs.begin());j!=subs.end();j++)
00659 cout << printResidue( *j, 1 ) << " ";
00660 cout << endl;
00661 }
00662
00663 s="*CPXY";
00664 w=makeResidue(s);
00665 subs.clear();
00666 generateSubstitutesProtein( w, subs, 5 );
00667 for (vector<Word>::iterator j(subs.begin());j!=subs.end();j++)
00668 cout << printResidue( *j, 5 ) << endl;
00669 assert(subs.size()==3);
00670
00671 s="EIKLMN";
00672 w=makeResidue(s);
00673 subs.clear();
00674 generateSubstitutesProtein( w, subs, 6 );
00675 for (vector<Word>::iterator j(subs.begin());j!=subs.end();j++)
00676 cout << printResidue( *j, 6 ) << endl;
00677 assert(subs.size()==18);
00678
00679
00680 assert(subVals[subStarts[gNumCodonEncodings]]==99999);
00681
00682
00683 {
00684 string orig =
00685 "ttcgtgacaggcagttcaggtcacgtggtgacttgatgacccattgtcaaatgtccaattgtta";
00686 string subd =
00687 "CtcgtgacaAgcagttcaAgtcacgtgAtgacttgaCgacccattAtcaaatgtTcaattgttG";
00688
00689
00690 SequenceReaderString origReader(orig,cout);
00691 SequenceReaderString subdReader(subd,cout);
00692
00693 HashTablePacked origHash(cout);
00694 creator.createHashTable(origHash,origReader,8,100000);
00695
00696 WordSequence subdSeq, seq;
00697
00698 subdReader.getNextSequence( subdSeq,8 );
00699
00700 HitListVector hl;
00701
00702 seq=subdSeq;
00703 origHash.matchSequence( seq, hl );
00704 assert(hl.size()==0);
00705
00706 origHash.setSubstituteThreshold(1);
00707
00708 seq=subdSeq;
00709 origHash.matchSequence( seq, hl );
00710 for (HitListVector::iterator i(hl.begin()); i!=hl.end();i++)
00711 cout << i->subjectNum << " " << i->diff << " " << i->queryPos << endl;
00712 assert(hl.size()==8);
00713 }
00714
00715 {
00716 string orig =
00717 "LAKAGKKTGPQDAECRSCHNPKPSAASSWKEIGFDKSLHYRHVASKAIKPVGDPQKNCGA";
00718 string subd =
00719 "iAKAGKeTGPQDsECRSCnNPKPSsASSfKEIGFnKtLHYkHVASKAmKPVGDPkKhCGA";
00720
00721
00722 SequenceReaderStringProtein origReader(orig,cout);
00723 SequenceReaderStringProtein subdReader(subd,cout);
00724
00725 HashTablePackedProtein origHash(cout);
00726 creator.createHashTable(origHash,origReader,5,100000);
00727
00728 WordSequence subdSeq, seq;
00729
00730 subdReader.getNextSequence( subdSeq,5 );
00731
00732 HitListVector hl;
00733
00734 seq=subdSeq;
00735 origHash.matchSequence( seq, hl );
00736 assert(hl.size()==0);
00737
00738 origHash.setSubstituteThreshold(1);
00739
00740 seq=subdSeq;
00741 origHash.matchSequence( seq, hl );
00742 for (HitListVector::iterator i(hl.begin()); i!=hl.end();i++)
00743 cout << i->subjectNum << " " << i->diff << " " << i->queryPos << endl;
00744 cout << hl.size() << endl;
00745 assert(hl.size()==12);
00746 }
00747
00748
00749 }
00750 cout << "Test passed!\n";
00751
00752
00753
00754 cout << "Test " << ++numTests
00755 <<": test of flag mode\n\n";
00756 {
00757 string testSub
00758 ="TTTTTTT-TTTTTTT-TTTTTTT-TTTTTTT-TTTTTTT-TTTTTTT-TTTTTTT-TTTTTTT";
00759
00760 string testQuery="TTTNTTT";
00761
00762 HitListVector hl1, hl2;
00763
00764 SequenceReaderString
00765 queryReader(testQuery, cout), subjectReader(testSub,cout);
00766 SequenceReaderModeReplace replace('T');
00767 SequenceReaderModeFlagReplace tag('T');
00768 WordSequence testSeq, seq;
00769 queryReader.changeMode(&replace);
00770 queryReader.getNextSequence( testSeq, 7 );
00771
00772
00773
00774
00775 {
00776 subjectReader.changeMode(&replace);
00777
00778 HashTable hash(cout);
00779 HashTablePacked hashPacked(cout);
00780 creator.createHashTable(hash,subjectReader,7,1000);
00781 creator.createHashTable(hashPacked,subjectReader,7,1000);
00782
00783 seq=testSeq;
00784 hash.matchSequence( seq, hl1 );
00785 for (HitListVector::iterator i(hl1.begin()); i!=hl1.end();i++)
00786 cout << i->subjectNum
00787 << " " << i->diff << " " << i->queryPos << endl;
00788 cout << hl1.size() << endl;
00789 assert(hl1.size()==9);
00790
00791 seq=testSeq;
00792 hashPacked.matchSequence( seq, hl2 );
00793 assert(hl1==hl2);
00794
00795 hl1.clear();
00796 hl2.clear();
00797
00798
00799
00800 queryReader.changeMode(&tag);
00801 queryReader.rewind();
00802 queryReader.getNextSequence( seq, 7 );
00803 hash.matchSequence( seq, hl1 );
00804 hashPacked.matchSequence( seq, hl2 );
00805 assert(hl1.size()==0);
00806 assert(hl2.size()==0);
00807
00808 }
00809 {
00810
00811 subjectReader.changeMode(&tag);
00812 hl1.clear();hl2.clear();
00813
00814 HashTable hash(cout);
00815 HashTablePacked hashPacked(cout);
00816 creator.createHashTable(hash,subjectReader,7,1000);
00817 creator.createHashTable(hashPacked,subjectReader,7,1000);
00818
00819 seq=testSeq;
00820 hash.matchSequence( seq, hl1 );
00821
00822 for (HitListVector::iterator i(hl1.begin()); i!=hl1.end();i++)
00823 cout << i->subjectNum
00824 << " " << i->diff << " " << i->queryPos << endl;
00825 cout << hl1.size() << endl;
00826 assert(hl1.size()==2);
00827
00828 seq=testSeq;
00829 hashPacked.matchSequence( seq, hl2 );
00830
00831 for (HitListVector::iterator i(hl2.begin()); i!=hl2.end();i++)
00832 cout << i->subjectNum
00833 << " " << i->diff << " " << i->queryPos << endl;
00834 cout << hl2.size() << endl;
00835
00836 assert(hl1==hl2);
00837
00838
00839 }
00840
00841
00842 }
00843 cout << "Test passed!" << endl << endl;
00844
00845
00846
00847
00848 cout << "Test " << ++numTests
00849 <<": test of flag mode for translated DNA\n\n";
00850 {
00851
00852 SequenceReaderModeReplace replaceMode('A');
00853 SequenceReaderModeFlagReplace flagMode('A');
00854 string testSeq =
00855 "agtagtagtagtagtagtagtagtagtagtXgtagtagtagtagtagtagtagtagtagtag";
00856
00857
00858
00859 SequenceReaderString subjectReader(testSeq, cout);
00860
00861 HashTableTranslatedPeek tableUnflagged, tableFlagged;
00862
00863 HashTableComponent& peekUnflaggedFwd(tableUnflagged.peekFwd());
00864 HashTableComponent& peekUnflaggedRev(tableUnflagged.peekRev());
00865 HashTableComponent& peekFlaggedFwd(tableFlagged.peekFwd());
00866 HashTableComponent& peekFlaggedRev(tableFlagged.peekRev());
00867
00868 cout << "making unflagged" << endl;
00869 subjectReader.changeMode(&replaceMode);
00870 tableUnflagged.createHashTable( subjectReader, 5, 10000 );
00871 cout << endl << " **** making flagged ****" << endl << endl;
00872 subjectReader.changeMode(&flagMode);
00873 tableFlagged.createHashTable( subjectReader, 5, 10000 );
00874
00875 string queryString="SSSSSSSSS";
00876
00877 SequenceReaderStringProtein queryReader(queryString,cout);
00878 WordSequence w1, w2;
00879 HitListVector h1, h2;
00880 queryReader.getNextSequence(w1,5);
00881 assert(w1.size()==2);
00882
00883 w2=w1;
00884
00885 cout << "hits for unflagged" << endl;
00886
00887 tableUnflagged.matchSequenceProtein( w1, h1);
00888 for (HitListVector::iterator i(h1.begin()); i!=h1.end(); i++)
00889 cout << i->subjectNum << " " << i->diff << " " << i->queryPos << endl;
00890
00891 cout << "hits for flagged" << endl;
00892
00893 tableFlagged.matchSequenceProtein( w2, h2);
00894 for (HitListVector::iterator i(h2.begin()); i!=h2.end(); i++)
00895 cout << i->subjectNum << " " << i->diff << " " << i->queryPos << endl;
00896
00897
00898
00899
00900 Word w = makeResidue((string)"SSSSS");
00901 PackedHitStore phu, phf;
00902 HitListVector hu, hf;
00903
00904 cout << "matching unflagged" << endl;
00905 peekUnflaggedFwd.matchWordStandard(w, phu, 0);
00906 peekUnflaggedFwd.convertHits( phu, hu );
00907
00908 for (HitListVector::iterator i(hu.begin()); i!=hu.end(); i++)
00909 cout << i->subjectNum << " " << i->diff << " " << i->queryPos<< endl;
00910
00911 cout << "matching flagged" << endl;
00912 peekFlaggedFwd.matchWord(w, phf, 0);
00913 cout << "converting hits" << endl;
00914 peekFlaggedFwd.convertHits( phf, hf );
00915
00916 for (HitListVector::iterator i(hf.begin()); i!=hf.end(); i++)
00917 cout << i->subjectNum << " " << i->diff << " " << i->queryPos << endl;
00918
00919 hu.clear(); hf.clear(); phu.clear(); phf.clear();
00920
00921 w=makeResidue((string)"YYYYY");
00922
00923
00924 cout << "matching unflagged" << endl;
00925 peekUnflaggedRev.matchWordStandard(w, phu, 0);
00926 peekUnflaggedRev.convertHits( phu, hu );
00927
00928 for (HitListVector::iterator i(hu.begin()); i!=hu.end(); i++)
00929 cout << i->subjectNum << " " << i->diff << " " << i->queryPos<< endl;
00930
00931 cout << "matching flagged" << endl;
00932 peekFlaggedRev.matchWord(w, phf, 0);
00933 cout << "converting hits" << endl;
00934 peekFlaggedRev.convertHits( phf, hf );
00935
00936 for (HitListVector::iterator i(hf.begin()); i!=hf.end(); i++)
00937 cout << i->subjectNum << " " << i->diff << " " << i->queryPos << endl;
00938
00939
00940 }
00941 cout << "Test passed!\n";
00942
00943
00944
00945 return (0);
00946
00947 }
00948 catch (const SSAHAException& err )
00949 {
00950 cout << "Caught SSAHA exception: " << err.what() << "\n";
00951 exit(1);
00952 }
00953 catch (const std::exception& err )
00954 {
00955 cout << "Caught exception: " << err.what() << "\n";
00956 exit(1);
00957 }
00958
00959
00960 }
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974