00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <exception>
00034 #include "SequenceEncoder.h"
00035 #include "SequenceReader.h"
00036 #include "SequenceReaderFasta.h"
00037 #include "SequenceReaderFastq.h"
00038 #include "SequenceReaderString.h"
00039 #include "SequenceReaderMulti.h"
00040 #include "SequenceReaderLocal.h"
00041 #include "SequenceReaderFilter.h"
00042
00043 #include "GenerateTestFastaFiles.h"
00044 #include "assert.h"
00045 #include <string>
00046 #include <strstream>
00047 #include <iostream>
00048
00049
00050 void capitalise( string& s )
00051 {
00052 for ( int i(0) ; i < s.size() ; ++i ) s[i] = toupper(s[i]);
00053 }
00054 void reverseString( string& seq )
00055 {
00056 string rc;
00057 for ( int i(0) ; i < seq.size() ; i++ )
00058 {
00059 if ( ( seq[i] == 'A' ) || (seq[i] == 'a') ) rc = 'T' + rc;
00060 else if ( ( seq[i] == 'T' ) || (seq[i] == 't') ) rc = 'A' + rc;
00061 else if ( ( seq[i] == 'G' ) || (seq[i] == 'g') ) rc = 'C' + rc;
00062 else if ( ( seq[i] == 'C' ) || (seq[i] == 'c') ) rc = 'G' + rc;
00063 else throw SSAHAException();
00064 }
00065
00066 seq = rc;
00067
00068 }
00069
00070
00071
00072
00073 int main( void )
00074 {
00075 try
00076 {
00077
00078 int numTests = 0;
00079
00080 cout << "*******************************************" << endl << endl;
00081 cout << "Test of functions in SequenceReader modules" << endl << endl;
00082 cout << "*******************************************" << endl << endl;
00083
00084 const int wordLength(10);
00085 std::ostrstream buffer;
00086 string s1,s2,s3,s4,s5;
00087
00088
00089
00090 cout << " --- Tests of member functions of SequenceReaderFasta ---\n\n";
00091 cout << "Test " << ++numTests <<": test of function getNextSequence"
00092 << endl << endl;
00093
00094 int numSeqs = 10;
00095 int seqSize = 57;
00096
00097
00098 BaseGenerator testBases( numSeqs * seqSize );
00099
00100
00101
00102
00103 testBases.generateSubjectFile(seqSize,0);
00104
00105 SequenceReaderFasta testReader("test_subject.fasta",cout);
00106 WordSequence w;
00107 string expected, actual, name;
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117 const string spareBases("");
00118
00119
00120
00121 for ( int i(0) ; i < numSeqs ; i++ )
00122 {
00123
00124 testReader.getNextSequence(w,wordLength);
00125 buffer << printWord(w,wordLength) << ends;
00126 actual=buffer.str();
00127 buffer.freeze(false);
00128 buffer.seekp(0,ios::beg);
00129
00130 testBases.getBases(i*seqSize,seqSize,expected);
00131
00132 expected += spareBases;
00133
00134 testReader.getLastSequenceName(name);
00135 cout << "SequenceName : " << name << endl;
00136 cout << "Expected sequence: " << expected << endl;
00137 cout << "Actual sequence : " << actual << endl;
00138
00139 assert(expected == actual);
00140
00141 w.clear();
00142
00143 }
00144
00145 cout << "Test passed!" << endl << endl;
00146
00147
00148
00149 cout << "Test " << ++numTests <<": test of sequence numbering"
00150 << endl << endl;
00151
00152
00153 cout << "Last sequence number read: " << testReader.getLastSequenceNumber()
00154 << endl;
00155
00156 assert( testReader.getLastSequenceNumber() == numSeqs );
00157
00158 int numInLast = testReader.getNextSequence( w, wordLength );
00159
00160 cout << "getNextSequence returned " << numInLast
00161 << ": should be -1 because all sequences have been read." << endl;
00162
00163 assert( numInLast == -1 );
00164
00165 cout << "Number of sequences in file: " << testReader.getNumSequencesInFile()
00166 << endl;
00167
00168 assert( testReader.getNumSequencesInFile() == numSeqs );
00169
00170 cout << "Rewinding to start of data ..." << endl;
00171
00172 testReader.rewind();
00173
00174 cout << "Last sequence number read: " << testReader.getLastSequenceNumber()
00175 << endl;
00176
00177 assert( testReader.getLastSequenceNumber() == 0 );
00178
00179 cout << "Checking getBitsPerSymbol returns 2" << endl;
00180
00181 assert( testReader.getBitsPerSymbol()==2 );
00182
00183 cout << "Test passed!" << endl << endl;
00184
00185
00186
00187
00188 cout << "Test " << ++numTests <<": test of getSequence"
00189 << endl << endl;
00190
00191 int toRead = 5;
00192
00193 w.clear();
00194 testReader.getSequence(w,toRead,wordLength);
00195 buffer << printWord(w,wordLength) << ends;
00196 actual=buffer.str();
00197 buffer.freeze(false);
00198 buffer.seekp(0,ios::beg);
00199
00200 testBases.getBases((toRead-1)*seqSize,seqSize,expected);
00201
00202 expected += spareBases;
00203
00204 cout << "Expected sequence: " << expected << endl;
00205 cout << "Actual sequence : " << actual << endl;
00206
00207 assert(expected == actual);
00208
00209 cout << toRead << " " << testReader.getLastSequenceNumber() << endl;
00210 assert(testReader.getLastSequenceNumber() == toRead );
00211
00212 w.clear();
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 cout << "Passing invalid sequence number to getSequence ..." << endl;
00230
00231 numInLast = testReader.getSequence( w, 987, wordLength );
00232
00233 cout << "getNextSequence returned " << numInLast
00234 << ": should be -1 because sequence number is invalid." << endl;
00235
00236 assert( numInLast == -1 );
00237
00238 cout << "Test passed!" << endl << endl;
00239
00240
00241
00242 cout << "Test " << ++numTests <<": test of random access output functions"
00243 << endl << endl;
00244
00245 for ( int i(1) ; i <= numSeqs; i++ )
00246 {
00247 cout << testReader.getName(i) << endl;
00248 cout << testReader.getSideInfo(i) << endl;
00249 cout << testReader.getSource(i) << endl;
00250 }
00251
00252
00253
00254 cout << " --- Tests of member functions of SequenceReaderFastq ---\n\n";
00255 cout << "Test " << ++numTests <<": test of function getNextSequence"
00256 << endl << endl;
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267 testBases.generateSubjectFileFastq(seqSize,0);
00268
00269 SequenceReaderFastq testReaderFastq("test_subject.fastq",cout);
00270
00271
00272
00273
00274
00275
00276
00277
00278 for ( int i(0) ; i < numSeqs ; i++ )
00279 {
00280
00281 testReaderFastq.getNextSequence(w,wordLength);
00282 buffer << printWord(w,wordLength) << ends;
00283 actual=buffer.str();
00284 buffer.freeze(false);
00285 buffer.seekp(0,ios::beg);
00286
00287 testBases.getBases(i*seqSize,seqSize,expected);
00288
00289 expected += spareBases;
00290
00291 testReaderFastq.getLastSequenceName(name);
00292 cout << "SequenceName : " << name << endl;
00293 cout << "Expected sequence: " << expected << endl;
00294 cout << "Actual sequence : " << actual << endl;
00295
00296 assert(expected == actual);
00297
00298 w.clear();
00299
00300 }
00301
00302 cout << "Test passed!" << endl << endl;
00303
00304
00305
00306 cout << "Test " << ++numTests <<": test of sequence numbering"
00307 << endl << endl;
00308
00309
00310 cout << "Last sequence number read: " << testReaderFastq.getLastSequenceNumber()
00311 << endl;
00312
00313 assert( testReaderFastq.getLastSequenceNumber() == numSeqs );
00314
00315 numInLast = testReaderFastq.getNextSequence( w, wordLength );
00316
00317 cout << "getNextSequence returned " << numInLast
00318 << ": should be -1 because all sequences have been read." << endl;
00319
00320 assert( numInLast == -1 );
00321
00322 cout << "Number of sequences in file: " << testReaderFastq.getNumSequencesInFile()
00323 << endl;
00324
00325 assert( testReaderFastq.getNumSequencesInFile() == numSeqs );
00326
00327 cout << "Rewinding to start of data ..." << endl;
00328
00329 testReaderFastq.rewind();
00330
00331 cout << "Last sequence number read: " << testReaderFastq.getLastSequenceNumber()
00332 << endl;
00333
00334 assert( testReaderFastq.getLastSequenceNumber() == 0 );
00335
00336 cout << "Test passed!" << endl << endl;
00337
00338 cout << "Checking getBitsPerSymbol returns 2" << endl;
00339
00340 assert( testReader.getBitsPerSymbol()==2 );
00341
00342
00343
00344 cout << "Test " << ++numTests <<": test of getSequence"
00345 << endl << endl;
00346
00347 toRead = 5;
00348
00349 w.clear();
00350 testReaderFastq.getSequence(w,toRead,wordLength);
00351 buffer << printWord(w,wordLength) << ends;
00352 actual=buffer.str();
00353 buffer.freeze(false);
00354 buffer.seekp(0,ios::beg);
00355
00356 testBases.getBases((toRead-1)*seqSize,seqSize,expected);
00357
00358 expected += spareBases;
00359
00360 cout << "Expected sequence: " << expected << endl;
00361 cout << "Actual sequence : " << actual << endl;
00362
00363
00364 assert(expected == actual);
00365
00366 cout << toRead << " " << testReaderFastq.getLastSequenceNumber() << endl;
00367 assert(testReaderFastq.getLastSequenceNumber() == toRead );
00368
00369 w.clear();
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390 cout << "Passing invalid sequence number to getSequence ..." << endl;
00391
00392 numInLast = testReaderFastq.getSequence( w, 987, wordLength );
00393
00394 cout << "getNextSequence returned " << numInLast
00395 << ": should be -1 because sequence number is invalid." << endl;
00396
00397 assert( numInLast == -1 );
00398
00399 cout << "Test passed!" << endl << endl;
00400
00401
00402
00403 cout << "Test " << ++numTests <<": test of random access output functions"
00404 << endl << endl;
00405
00406 for ( int i(1) ; i <= numSeqs; i++ )
00407 {
00408 cout << testReaderFastq.getName(i) << endl;
00409 cout << testReaderFastq.getSideInfo(i) << endl;
00410 cout << testReaderFastq.getSource(i) << endl;
00411 }
00412
00413
00414
00415 cout << "Test " << ++numTests <<": test of SequenceReaderString"
00416 << endl << endl;
00417
00418 expected="ATGCCTGCGTATGTAATGCTGGCATTGGTGCGTTTAATGCTCTCCTGATG";
00419 SequenceReaderString stringReader(expected,cout);
00420 expected+="AAAAAAAAAA";
00421
00422 assert(stringReader.getNumSequencesInFile()==1);
00423
00424 w.clear();
00425 numInLast = stringReader.getNextSequence(w,wordLength);
00426 buffer << printWord(w,wordLength) << ends;
00427 actual=buffer.str();
00428 buffer.freeze(false);
00429 buffer.seekp(0,ios::beg);
00430
00431 cout << "expected: " << expected << endl;
00432 cout << "actual: " << actual << " " << w.size() << " "
00433 << w.getNumBasesInLast() << endl;
00434
00435
00436 assert( numInLast == 0 );
00437 assert( expected == actual );
00438
00439 cout << "Checking getBitsPerSymbol returns 2" << endl;
00440
00441 assert( stringReader.getBitsPerSymbol()==2 );
00442
00443
00444
00445 cout << "Test passed!" << endl << endl;
00446
00447
00448
00449 cout << "Test " << ++numTests <<": test of SequenceReaderMulti"
00450 << endl << endl;
00451
00452 SequenceReaderMulti multiReader(cout);
00453
00454 int numReaders = 4;
00455
00456 for ( int i(0) ; i < numReaders ; i ++ )
00457 multiReader.addReader(testReader.clone());
00458
00459 assert( multiReader.getNumSequencesInFile() ==
00460 ( numReaders * testReader.getNumSequencesInFile() ) );
00461
00462 WordSequence wSingle, wMulti;
00463 multiReader.rewind();
00464
00465 for ( int i(0) ; i < numReaders ; i ++ )
00466 {
00467
00468 testReader.rewind();
00469 for ( int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ )
00470 {
00471 cout << i << " " << j << endl;
00472 wSingle.clear(); wMulti.clear();
00473 assert( multiReader.getBitsPerSymbol()==2 );
00474 assert( testReader.getNextSequence(wSingle, wordLength) ==
00475 multiReader.getNextSequence(wMulti, wordLength) );
00476
00477
00478 testReader.getLastSequenceName(expected);
00479 multiReader.getLastSequenceName(actual);
00480 cout << expected << " " << actual << endl;
00481 assert( expected == actual );
00482
00483 assert( multiReader.getLastSequenceNumber()
00484 == ( i*testReader.getNumSequencesInFile() )
00485 + testReader.getLastSequenceNumber() );
00486 assert ( wSingle == wMulti );
00487 }
00488
00489 }
00490
00491 wMulti.clear();
00492
00493 cout << multiReader.getLastSequenceNumber() << endl;
00494
00495 assert( multiReader.getLastSequenceNumber() ==
00496 ( numReaders * testReader.getNumSequencesInFile() ) );
00497 assert( multiReader.getNextSequence( wMulti,wordLength ) == -1 );
00498
00499 multiReader.rewind();
00500
00501 assert( multiReader.getLastSequenceNumber() == 0 );
00502
00503 cout << "Checking getBitsPerSymbol returns 2" << endl;
00504
00505 assert( multiReader.getBitsPerSymbol()==2 );
00506
00507 cout << "Test passed!" << endl << endl;
00508
00509
00510
00511 cout << "Test " << ++numTests <<": test of make/printBase/Residue"
00512 << endl << endl;
00513
00514 {
00515 Word w1;
00516 string s1;
00517
00518 s1="TaCgTTgtAGc";
00519 w1=makeBase(s1);
00520 buffer << printBase(w1,11) << ends;
00521 actual=buffer.str();
00522 buffer.freeze(false);
00523
00524 buffer.seekp(0,ios::beg);
00525
00526 cout << actual << "\n";
00527 cout << s1 << "\n";
00528 capitalise(s1);
00529 assert( actual == s1 );
00530
00531 s1="ACDEFG";
00532 w1=makeResidue(s1);
00533 buffer << printResidue(w1,6) << ends;
00534 actual=buffer.str();
00535 buffer.freeze(false);
00536
00537 buffer.seekp(0,ios::beg);
00538
00539 cout << actual << "\n";
00540 cout << s1 << "\n";
00541 capitalise(s1);
00542 assert( actual == s1 );
00543
00544 s1="NMLKIH";
00545 w1=makeResidue(s1);
00546 buffer << printResidue(w1,6) << ends;
00547 actual=buffer.str();
00548 buffer.freeze(false);
00549
00550 buffer.seekp(0,ios::beg);
00551
00552 cout << actual << "\n";
00553 cout << s1 << "\n";
00554 capitalise(s1);
00555 assert( actual == s1 );
00556
00557
00558 s1="PRTQSX";
00559 w1=makeResidue(s1);
00560 buffer << printResidue(w1,6) << ends;
00561 actual=buffer.str();
00562 buffer.freeze(false);
00563
00564 buffer.seekp(0,ios::beg);
00565
00566 cout << actual << "\n";
00567 cout << s1 << "\n";
00568 capitalise(s1);
00569 assert( actual == s1 );
00570
00571 s1="VWY***";
00572 w1=makeResidue(s1);
00573 buffer << printResidue(w1,6) << ends;
00574 actual=buffer.str();
00575 buffer.freeze(false);
00576
00577 buffer.seekp(0,ios::beg);
00578
00579 cout << actual << "\n";
00580 cout << s1 << "\n";
00581 capitalise(s1);
00582 assert( actual == s1 );
00583
00584 s1="acdefg";
00585 w1=makeResidue(s1);
00586 buffer << printResidue(w1,6) << ends;
00587 actual=buffer.str();
00588 buffer.freeze(false);
00589
00590 buffer.seekp(0,ios::beg);
00591
00592 cout << actual << "\n";
00593 cout << s1 << "\n";
00594 capitalise(s1);
00595 assert( actual == s1 );
00596
00597 s1="nmlkih";
00598 w1=makeResidue(s1);
00599 buffer << printResidue(w1,6) << ends;
00600 actual=buffer.str();
00601 buffer.freeze(false);
00602
00603 buffer.seekp(0,ios::beg);
00604
00605 cout << actual << "\n";
00606 cout << s1 << "\n";
00607 capitalise(s1);
00608 assert( actual == s1 );
00609
00610
00611 s1="prtqsx";
00612 w1=makeResidue(s1);
00613 buffer << printResidue(w1,6) << ends;
00614 actual=buffer.str();
00615 buffer.freeze(false);
00616
00617 buffer.seekp(0,ios::beg);
00618
00619 cout << actual << "\n";
00620 cout << s1 << "\n";
00621 capitalise(s1);
00622 assert( actual == s1 );
00623
00624 s1="vwy***";
00625 w1=makeResidue(s1);
00626 buffer << printResidue(w1,6) << ends;
00627 actual=buffer.str();
00628 buffer.freeze(false);
00629
00630 buffer.seekp(0,ios::beg);
00631
00632 cout << actual << "\n";
00633 cout << s1 << "\n";
00634 capitalise(s1);
00635 assert( actual == s1 );
00636
00637
00638 }
00639 cout << "Test passed!" << endl << endl;
00640
00641
00642
00643
00644 cout << "Test " << ++numTests <<": test of SequenceEncoder"
00645 << endl << endl;
00646 {
00647 WordSequence W;
00648 SequenceEncoderDNA encoder(12);
00649
00650 cout << "Checking getBitsPerSymbol returns 2" << endl;
00651
00652 assert( encoder.getBitsPerSymbol()==2 );
00653
00654 cout << "Checking mode changes...\n";
00655
00656 string s1="nOne@rEv@lID";
00657
00658 cout << "... by default, should ignore invalid characters\n";
00659 encoder.linkSeq(W);
00660 encoder.encode(s1);
00661 encoder.unlinkSeq();
00662 cout << W.size() << " " << W[0] << endl;
00663
00664 assert(W.size()==1);
00665 assert(W[0]==0);
00666
00667 cout << "...changing mode to replace invalid chars with 'G'\n";
00668 SequenceReaderModeReportReplace mode1('G');
00669
00670 encoder.changeMode( &mode1 );
00671 encoder.linkSeq(W);
00672 encoder.encode(s1);
00673 encoder.unlinkSeq();
00674 cout << W.size() << " " << printBase(W[0],12) << endl;
00675 assert(W.size()==2);
00676 assert(W[0]==makeBase("GGGGGGGGGGGG"));
00677
00678 cout << "...checking mode changes are passed when copied\n";
00679 W.clear();
00680 SequenceEncoder copy(encoder);
00681
00682 copy.linkSeq(W);
00683 copy.encode(s1);
00684 copy.unlinkSeq();
00685 cout << W.size() << " " << printBase(W[0],12) << endl;
00686 assert(W.size()==2);
00687 assert(W[0]==makeBase("GGGGGGGGGGGG"));
00688
00689 cout << "...changing copy to 'ignore' mode\n";
00690 W.clear();
00691 SequenceReaderModeIgnore mode2;
00692
00693 copy.changeMode( &mode2 );
00694 copy.linkSeq(W);
00695 copy.encode(s1);
00696 copy.unlinkSeq();
00697
00698 assert(W.size()==1);
00699 assert(W[0]==0);
00700
00701
00702
00703
00704 }
00705
00706 cout << "Test passed!" << endl << endl;
00707
00708
00709
00710
00711 cout << "Test " << ++numTests <<": test of SequenceEncoderDNA"
00712 << endl << endl;
00713
00714 {
00715
00716
00717
00718
00719
00720
00721
00722 int wordLength(12);
00723
00724 SequenceEncoderDNA encoder(wordLength);
00725
00726 string s1="AGCcGTGGGTTTG", s2="TTGgGTTGGGGAC", s3="AGCTGaTTTGCAAGtCAA";
00727
00728 WordSequence W;
00729
00730 encoder.linkSeq(W);
00731 encoder.encode(s1);
00732 encoder.encode(s2);
00733 encoder.encode(s3);
00734 encoder.unlinkSeq();
00735 buffer << printBase(W,wordLength) << ends;
00736 actual=buffer.str();
00737 buffer.freeze(false);
00738
00739 buffer.seekp(0,ios::beg);
00740
00741 cout << printBase(W,wordLength) << "\n";
00742 cout << s1+s2+s3 << "\n";
00743
00744 expected = s1+s2+s3;
00745 capitalise(expected);
00746 cout << actual << actual.size() << endl << expected << expected.size() << endl;
00747
00748 assert(actual==expected);
00749
00750 W.clear();
00751
00752 }
00753 cout << "Test passed!" << endl << endl;
00754
00755
00756
00757 cout << "Test " << ++numTests <<": additional tests of SequenceEncoderDNA"
00758 << endl << endl;
00759
00760 {
00761
00762 string fiftyAs="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
00763 string thisString;
00764 WordSequence w;
00765 int numInLast;
00766 SequenceReaderModeIgnore ignore;
00767 SequenceReaderModeReplace replace('T');
00768 SequenceReaderModeFlagReplace tag('T');
00769 int dudPos;
00770 Word dudValue;
00771
00772
00773 for (int wordLength(9); wordLength <= 12; wordLength++)
00774 {
00775 for (int i(0) ; i < fiftyAs.size(); i++)
00776 {
00777 thisString=fiftyAs;
00778 thisString[i]='X';
00779 w.clear();
00780 dudPos=i/wordLength;
00781 dudValue=3<<(2*(wordLength-(i%wordLength)-1));
00782 cout << "dud " << wordLength << " " << i << " "
00783 << dudPos << " " << printWord(dudValue, wordLength) << endl;
00784
00785 SequenceReaderString stringReader(thisString,cout);
00786
00787
00788 assert(stringReader.getNumSequencesInFile()==1);
00789
00790
00791 cout << "testing ignore mode" << endl;
00792
00793 stringReader.changeMode(&ignore);
00794 w.clear();
00795 numInLast = stringReader.getNextSequence(w,wordLength);
00796 assert(numInLast==(fiftyAs.size()-1)%wordLength);
00797 assert((w.size()-1)*wordLength+numInLast==fiftyAs.size()-1);
00798
00799 for (WordSequence::iterator j(w.begin()); j!=w.end();j++)
00800 {
00801 assert (*j==0);
00802 }
00803
00804
00805 cout << "testing replace mode" << endl;
00806
00807 stringReader.rewind();
00808 stringReader.changeMode(&replace);
00809 w.clear();
00810 numInLast = stringReader.getNextSequence(w,wordLength);
00811 cout << "blegh " << wordLength << " " << i << " " << fiftyAs.size() << " " << numInLast << " " << w.size() << endl;
00812
00813 assert(numInLast==fiftyAs.size()%wordLength);
00814 assert((w.size()-1)*wordLength+numInLast==fiftyAs.size());
00815
00816 for (WordSequence::iterator j(w.begin()); j!=w.end();j++)
00817 {
00818 cout << wordLength << " " << i << " " << (j-w.begin()) << " "
00819 << *j << " " << gCursedWord << endl;
00820 if (j-w.begin()==dudPos) assert (*j==dudValue);
00821 else assert (*j==0);
00822 }
00823
00824
00825 cout << "testing flag mode" << endl;
00826
00827 stringReader.rewind();
00828 stringReader.changeMode(&tag);
00829 w.clear();
00830 numInLast = stringReader.getNextSequence(w,wordLength);
00831 assert(numInLast==fiftyAs.size()%wordLength);
00832 assert((w.size()-1)*wordLength+numInLast==fiftyAs.size());
00833
00834 for (WordSequence::iterator j(w.begin()); j!=w.end();j++)
00835 {
00836 cout << wordLength << " " << i << " " << (j-w.begin()) << " "
00837 << *j << " " << gCursedWord << endl;
00838 if (j-w.begin()==dudPos) assert (*j==dudValue|gCursedWord);
00839 else assert (*j==0);
00840 }
00841
00842
00843
00844 }
00845
00846
00847 }
00848
00849
00850
00851
00852
00853
00854
00855
00856 int wordLength(12);
00857
00858 SequenceEncoderDNA encoder(wordLength);
00859
00860 string s1="AGCcGTGGGTTTG", s2="TTGgGTTGGGGAC", s3="AGCTGaTTTGCAAGtCAA";
00861
00862 WordSequence W;
00863
00864 encoder.linkSeq(W);
00865 encoder.encode(s1);
00866 encoder.encode(s2);
00867 encoder.encode(s3);
00868 encoder.unlinkSeq();
00869 buffer << printBase(W,wordLength) << ends;
00870 actual=buffer.str();
00871 buffer.freeze(false);
00872
00873 buffer.seekp(0,ios::beg);
00874
00875 cout << printBase(W,wordLength) << "\n";
00876 cout << s1+s2+s3 << "\n";
00877
00878 expected = s1+s2+s3;
00879 capitalise(expected);
00880 cout << actual << actual.size() << endl << expected << expected.size() << endl;
00881
00882 assert(actual==expected);
00883
00884 W.clear();
00885
00886 }
00887 cout << "Test passed!" << endl << endl;
00888
00889
00890
00891 cout << "Test " << ++numTests <<": test of SequenceReaderStringProtein"
00892 << endl << endl;
00893
00894 {
00895
00896 string protString("THEQXICK*R*WNF***XMPS*VERTHELA*YD*G");
00897
00898 SequenceReaderStringProtein sr(protString,cout);
00899
00900 int wordLength(5);
00901 WordSequence W, W2;
00902 sr.getNextSequence(W,wordLength);
00903
00904 assert( sr.getNextSequence(W,wordLength) == -1 );
00905
00906 sr.rewind();
00907
00908 sr.getNextSequence(W2,wordLength);
00909
00910 assert( W==W2 );
00911
00912 string wordString;
00913
00914 cout << "expected: " << protString << endl;
00915 cout << "actual: " << printResidue(W,wordLength) << endl;
00916
00917
00918 for ( int i(0),j(0); i < protString.size() ; i+=wordLength,j++ )
00919 {
00920 wordString = protString.substr(i, wordLength);
00921 cout << wordString << " " << printResidue(W[j],wordLength) << endl;
00922 assert(W[j]==makeResidue(wordString));
00923 }
00924
00925
00926 cout << "Checking getBitsPerSymbol returns 5" << endl;
00927
00928 assert( sr.getBitsPerSymbol()==5 );
00929
00930 }
00931 cout << "Test passed!" << endl << endl;
00932
00933
00934
00935 cout << "Test " << ++numTests <<": test of SequenceEncoderProtein"
00936 << endl << endl;
00937
00938 {
00939
00940 int wordLength(5);
00941
00942 SequenceEncoderProtein encoder(wordLength);
00943
00944 cout << "Checking getBitsPerSymbol returns 5" << endl;
00945
00946 assert( encoder.getBitsPerSymbol()==5 );
00947
00948
00949 string s1="YNLHEPCSADKFWQMV", s2="GI*TRXnvqramyg", s3="pswhcdflxteik";
00950
00951 WordSequence W;
00952
00953 encoder.linkSeq(W);
00954 encoder.encode(s1);
00955 encoder.encode(s2);
00956 encoder.encode(s3);
00957 encoder.unlinkSeq();
00958 buffer << printResidue(W,wordLength) << ends;
00959 actual=buffer.str();
00960 buffer.freeze(false);
00961
00962 buffer.seekp(0,ios::beg);
00963
00964 cout << printResidue(W,wordLength) << "\n";
00965 cout << s1+s2+s3 << "\n";
00966
00967 expected = s1+s2+s3;
00968 capitalise(expected);
00969 assert(actual==expected);
00970
00971 W.clear();
00972
00973 }
00974
00975 cout << "Test passed!" << endl << endl;
00976
00977
00978
00979 cout << "Test " << ++numTests <<": test of codonize functions"
00980 << endl << endl;
00981
00982 {
00983 string allCodonsDNA ="taagcatgtgatgaatttggtcacataaaacttatgaatccccagcgttcaaccgtttggtat";
00984
00985 CodonList c1, c2;
00986 WordSequence w1,w2,w1Rev;
00987
00988 for (int k(0); k < 15; k++)
00989 {
00990 w1.clear(); w2.clear(); w1Rev.clear();
00991 SequenceReaderString reader(allCodonsDNA);
00992 reader.rewind();
00993 reader.getNextSequence(w1,gMaxBasesPerWord);
00994 reader.rewind();
00995 reader.getNextSequence(w2,gMaxBasesPerWord-1);
00996
00997 for (int i(0) ; i <gNumReadingFrames; i++ )
00998 {
00999 c1.clear(); c2.clear();
01000 codonize( w1, c1, i );
01001 codonizeAndFlag( w2, c2, i );
01002 cout << "c1: " << c1 << endl;
01003 cout << "c2: " << c2 << endl;
01004 assert (c1==c2);
01005 }
01006
01007 reverseComplement(w1, w1Rev, gMaxBasesPerWord);
01008
01009
01010 for (int i(0) ; i <gNumReadingFrames; i++ )
01011 {
01012 c1.clear(); c2.clear();
01013 codonize( w1Rev, c1, i );
01014 codonizeAndFlagReverse( w2, c2, i );
01015 cout << "c1: " << c1 << endl;
01016 cout << "c2: " << c2 << endl;
01017 assert (c1==c2);
01018 }
01019
01020 allCodonsDNA=allCodonsDNA.substr(0,allCodonsDNA.size()-1);
01021 }
01022
01023 }
01024
01025 cout << "Test passed!" << endl << endl;
01026
01027
01028
01029
01030 #ifdef SEQUENCE_READER_CODON_NO_LONGER_USED
01031 cout << "Test " << ++numTests <<": test of SequenceReaderCodon"
01032 << endl << endl;
01033
01034 {
01035
01036
01037 int wordLength(5);
01038 const string expectedCodons((string)gCodonNames);
01039 SequenceReaderStringProtein exp(expectedCodons);
01040 WordSequence expectedSeq;
01041 exp.getNextSequence( expectedSeq, wordLength );
01042 cout << printResidue( expectedSeq, wordLength ) << endl;
01043
01044
01045 string allPossibleCodons;
01046 string allAs("");
01047 for ( int i(0) ; i < 4 ; i++ )
01048 for ( int j(0) ; j < 4 ; j ++ )
01049 for ( int k(0) ; k < 4 ; k ++ )
01050 {
01051 allPossibleCodons += gBaseNames[i];
01052 allPossibleCodons += gBaseNames[j];
01053 allPossibleCodons += gBaseNames[k];
01054 }
01055
01056
01057 cout << allPossibleCodons << endl;
01058
01059 for ( int readFrame(0); readFrame < 6 ; readFrame ++ )
01060 {
01061 if (readFrame ==3) allAs="";
01062 string toBeCoded(allAs+allPossibleCodons);
01063 if (readFrame>=3) reverseString(toBeCoded);
01064
01065
01066 SequenceReaderString readDNA(toBeCoded,cout);
01067 SequenceReaderCodon readCodon(readDNA,cout);
01068
01069 assert( readCodon.getBitsPerSymbol()==5 );
01070
01071 WordSequence w;
01072 string name;
01073
01074 readDNA.getNextSequence( w, 12 );
01075 cout << printBase( w, 12 ) << endl;
01076 readDNA.rewind(); w.clear();
01077
01078 for ( int perm(0) ; perm < 6 ; perm++ )
01079 {
01080 readCodon.getNextSequence( w, wordLength );
01081 readCodon.getLastSequenceName(name);
01082 cout << name << endl;
01083 cout << printResidue( w, wordLength ) << endl;
01084 if (readFrame==perm)
01085 {
01086 cout << printResidue( expectedSeq, wordLength ) << endl;
01087 assert(w==expectedSeq);
01088 }
01089 }
01090
01091 allAs+="A";
01092 assert(readCodon.getNextSequence( w, wordLength )==-1);
01093
01094 }
01095
01096 cout << "testing SequenceEncoderCodon...\n";
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153 }
01154 cout << "Test passed!" << endl << endl;
01155 #endif
01156
01157
01158 cout << "Test " << ++numTests <<": test of SequenceReaderLocal"
01159 << endl << endl;
01160
01161 SequenceReaderLocal localReader(testReader,wordLength,cout);
01162
01163 assert ( localReader.getNumSequencesInFile()
01164 == testReader.getNumSequencesInFile() );
01165
01166 for ( int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ )
01167 {
01168 wSingle.clear(); wMulti.clear();
01169
01170
01171
01172 assert( testReader.getNextSequence(wSingle, wordLength) ==
01173 localReader.getNextSequence(wMulti, wordLength) );
01174 testReader.getLastSequenceName(expected);
01175 testReader.getLastSequenceName(actual);
01176 cout << expected << " " << actual << endl;
01177 assert( expected == actual );
01178 cout << localReader.getLastSequenceNumber() << " "
01179 << testReader.getLastSequenceNumber() << endl;
01180
01181 assert( localReader.getLastSequenceNumber()
01182 == testReader.getLastSequenceNumber() );
01183 assert ( wSingle == wMulti );
01184 }
01185
01186 assert( localReader.getNextSequence(wMulti, wordLength)== -1 );
01187
01188
01189
01190
01191
01192
01193
01194 cout << "Test passed!" << endl << endl;
01195
01196
01197
01198 cout << "Test " << ++numTests <<": test of SequenceReaderFilter"
01199 << endl << endl;
01200
01201 {
01202
01203 SequenceReader* pFilteree = new SequenceReaderFastq("test_filter.fastq");
01204 SequenceReaderFilter filterer(pFilteree,"test_filter.fail");
01205
01206 cout << "Check details match original SeqReader..." << endl;
01207 assert(filterer.getBitsPerSymbol()==pFilteree->getBitsPerSymbol());
01208 assert(filterer.getSourceDataType()==pFilteree->getSourceDataType());
01209
01210 cout << "Check cycles through all sequences OK..." << endl;
01211 vector<WordSequence> seqs;
01212 vector<string> names;
01213
01214 seqs.push_back(WordSequence());
01215 names.push_back(string());
01216
01217 while (filterer.getNextSequence(seqs.back(),16)!=-1)
01218 {
01219 filterer.getLastSequenceName(names.back());
01220 cout << names.back() << endl << printWord(seqs.back(),16) << endl;
01221 seqs.push_back(WordSequence());
01222 names.push_back(string());
01223
01224 }
01225
01226 cout << "Read all sequences OK" << endl;
01227
01228 cout << "Check no sequences have been \"lost\"..." << endl;
01229 cout << pFilteree->getNumSequencesInFile()
01230 << " seqs in original file\n";
01231 cout << filterer.getNumSequencesInFile()
01232 << " seqs in filtered file\n";
01233 cout << filterer.getNumFiltered()
01234 << " filtered seqs\n";
01235
01236 assert( pFilteree->getNumSequencesInFile()
01237 == filterer.getNumSequencesInFile()
01238 + filterer.getNumFiltered() );
01239
01240
01241 cout
01242 << "Check output of getSequence matches output of getNextSequence..."
01243 << endl;
01244 WordSequence seq;
01245 string name;
01246
01247 for ( int i(1) ; i <= filterer.getNumSequencesInFile() ; i++ )
01248 {
01249 filterer.getSequence(seq, i, 16);
01250 filterer.getLastSequenceName(name);
01251 cout << name << endl << printWord(seq,16) << endl;
01252 assert(seq==seqs[i-1]);
01253 assert(name==names[i-1]);
01254 }
01255
01256 cout << "Check rewind works..." << endl;
01257 filterer.rewind();
01258
01259 assert (filterer.getNextSequence(seq,16)!=-1);
01260
01261 assert (seq==seqs[0]);
01262
01263
01264 assert( filterer.getSequence
01265 (seq, filterer.getNumSequencesInFile()+1, 16)
01266 == -1 );
01267 assert( filterer.getSequence(seq, 0, 16) == -1 );
01268
01269
01270
01271 }
01272 cout << "Test passed!" << endl << endl;
01273
01274
01275
01276 cout << "Test " << ++numTests
01277 <<": test of SourceReader functionality of SequenceReaderFile"
01278 << endl << endl;
01279
01280 {
01281 SequenceReaderFasta testReader("test_extract.fasta");
01282 SequenceReaderMulti testReaderMulti(cout);
01283 testReaderMulti.addReader(testReader.clone());
01284
01285 testReader.saveIndex("test_index1");
01286 testReaderMulti.saveIndex("test_index2");
01287
01288
01289 SourceReaderIndex testIndexReader("test_index2");
01290
01291
01292 string data=
01293 "tggtaaaaaaggttttcctgcatattaatatcatggtataggtgattgctgatggtatgtctttcttttactagatcatgcactgttcaggagcagaaaaatgtggctgttttttccaccacatattgtcctactactgtgtaccctgtctcagtaaatgccagtcagttacccaagctaaatgtccttgattcttccattctttcactacttccctggttcacattccagttggccaccaggtcatacttattttatcttaaaaattctctctctgaaatgcaaatcaaaactacgatgagataccatctcaaaccagttagtttgagatggcagttattaaaaagttcaaacataacagatgctgcgaggttgtggagaaaagaaaacacttatgcactgttggtgggagtgtaaattaattcaaccattgtggaaagtagtgtggtgatttctcaaagagctgaaagcagaactaccaatcaatccagccattccattactg-gtatatacccagaggaatataaattgttctatcataaagacacatgcataggtgtgtccattgtatcactactta-catagcaaagacatgaaatcaaccta-atgcccactggtgatagac";
01294
01295 int seqSize=data.size();
01296
01297 cout << seqSize << endl;
01298
01299
01300 char* v1;
01301 char* v2;
01302 char* v1m;
01303 char* v2m;
01304 char* v1i;
01305 char* v2i;
01306
01307 string n1,n2;
01308
01309 for (int j(1) ; j <= seqSize ; j++)
01310 {
01311 cout << "j: " << j << endl;
01312
01313
01314
01315
01316 testReader.extractSource(&v1, 1, j, seqSize );
01317 testReader.extractSource(&v2, 2, j, seqSize );
01318
01319 testReaderMulti.extractSource(&v1m, 1, j, seqSize );
01320 testReaderMulti.extractSource(&v2m, 2, j, seqSize );
01321
01322 testIndexReader.extractSource(&v1i, 1, j, seqSize );
01323 testIndexReader.extractSource(&v2i, 2, j, seqSize );
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333 for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1[i-j]==data[i-1]);
01334 for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2[i-j]==data[i-1]);
01335 for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1m[i-j]==data[i-1]);
01336 for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2m[i-j]==data[i-1]);
01337 for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1i[i-j]==data[i-1]);
01338 for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2i[i-j]==data[i-1]);
01339
01340 }
01341 cout << "got through first bit" << endl;
01342
01343 for (int j(1) ; j <= seqSize ; j++)
01344 {
01345
01346
01347
01348
01349 testReader.extractSource(&v1, 1, 1, j );
01350 testReader.extractSource(&v2, 2, 1, j );
01351
01352 testReaderMulti.extractSource(&v1m, 1, 1, j );
01353 testReaderMulti.extractSource(&v2m, 2, 1, j );
01354
01355 testIndexReader.extractSource(&v1i, 1, 1, j );
01356 testIndexReader.extractSource(&v2i, 2, 1, j );
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366 for ( int i(1) ; i < j ; i++ ) assert(v1[i]==data[i]);
01367 for ( int i(1) ; i < j ; i++ ) assert(v2[i]==data[i]);
01368 for ( int i(1) ; i < j ; i++ ) assert(v1m[i]==data[i]);
01369 for ( int i(1) ; i < j ; i++ ) assert(v2m[i]==data[i]);
01370 for ( int i(1) ; i < j ; i++ ) assert(v1i[i]==data[i]);
01371 for ( int i(1) ; i < j ; i++ ) assert(v2i[i]==data[i]);
01372
01373 }
01374 cout << "got through second bit" << endl;
01375
01376
01377
01378 }
01379 cout << "Test passed!" << endl << endl;
01380
01381
01382
01383 cout << "Test " << ++numTests
01384 <<": test that SourceReader functionality works with getNextSequence"
01385 << endl << endl;
01386
01387 {
01388 string data=
01389 "tggtaaaaaaggttttcctgcatattaatatcatggtataggtgattgctgatggtatgtctttcttttactagatcatgcactgttcaggagcagaaaaatgtggctgttttttccaccacatattgtcctactactgtgtaccctgtctcagtaaatgccagtcagttacccaagctaaatgtccttgattcttccattctttcactacttccctggttcacattccagttggccaccaggtcatacttattttatcttaaaaattctctctctgaaatgcaaatcaaaactacgatgagataccatctcaaaccagttagtttgagatggcagttattaaaaagttcaaacataacagatgctgcgaggttgtggagaaaagaaaacacttatgcactgttggtgggagtgtaaattaattcaaccattgtggaaagtagtgtggtgatttctcaaagagctgaaagcagaactaccaatcaatccagccattccattactg-gtatatacccagaggaatataaattgttctatcataaagacacatgcataggtgtgtccattgtatcactactta-catagcaaagacatgaaatcaaccta-atgcccactggtgatagac";
01390
01391 SequenceReaderFasta testReader("test_extract.fasta");
01392 SequenceReaderMulti testReaderMulti(cout);
01393 testReaderMulti.addReader(testReader.clone());
01394 SourceReaderIndex testIndexReader("test_index2");
01395
01396 WordSequence w1, w2;
01397
01398 char* v1; char* v2;
01399
01400 int numSeqs(4);
01401 int numChars(100);
01402
01403 string n1,n2;
01404
01405 for (int i(0);i<numSeqs; i++)
01406 {
01407 cout << "i: " << i << endl;
01408 w1.clear(); w2.clear();
01409
01410 assert(testReader.getLastSequenceNumber()==i);
01411 assert(testReaderMulti.getLastSequenceNumber()==i);
01412
01413 assert(testReader.findSequence(i+1)==true);
01414 assert(testReaderMulti.findSequence(i+1)==true);
01415
01416 assert(testReader.getLastSequenceNumber()==i);
01417 assert(testReaderMulti.getLastSequenceNumber()==i);
01418
01419
01420 assert(testReader.getNextSequence( w1, 10 )!=-1);
01421 assert(testReaderMulti.getNextSequence( w2, 10 )!=-1);
01422 assert(w1==w2);
01423
01424 assert(testReader.getLastSequenceNumber()==i+1);
01425 assert(testReaderMulti.getLastSequenceNumber()==i+1);
01426
01427
01428 testReader.getLastSequenceName(n1);
01429 testReaderMulti.getLastSequenceName(n2);
01430 assert(n1==n2);
01431 cout << n1 << " " << n1.size() << endl;
01432 cout << testIndexReader.extractName(i+1) << endl;
01433 n2=(string)testIndexReader.extractName(i+1);
01434 cout << n2 << " " << n2.size() << endl;
01435 assert(n1==n2);
01436
01437 for ( int j(1); j <= numSeqs ; j++ )
01438 {
01439 cout << "j: " << j << endl;
01440
01441 testReader.extractSource(&v1, j, 1, numChars );
01442 testReaderMulti.extractSource(&v2, j, 1, numChars );
01443
01444
01445
01446
01447
01448 assert(testReader.getLastSequenceNumber()==i+1);
01449 assert(testReaderMulti.getLastSequenceNumber()==i+1);
01450
01451 for ( int k(0) ; k < numChars ; k++ ) assert(v1[k]==data[k]);
01452 for ( int k(0) ; k < numChars ; k++ ) assert(v2[k]==data[k]);
01453
01454
01455 }
01456
01457 }
01458
01459
01460
01461
01462
01463
01464
01465 }
01466 cout << "Test passed!" << endl << endl;
01467
01468
01469
01470
01471
01472
01473
01474 return (0);
01475
01476 }
01477 catch (const SSAHAException& err )
01478 {
01479 cout << "Caught SSAHA exception: " << err.what() << "\n";
01480 exit(1);
01481 }
01482 catch (const std::exception& err )
01483 {
01484 cout << "Caught exception: " << err.what() << "\n";
01485 exit(1);
01486 }
01487
01488
01489 }
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500