SequenceReader/testSequenceReaderFasta.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : testSequenceReaderFasta
00025 // File Name    : testSequenceReaderFasta.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include <exception>
00034 #include "SequenceEncoder.h"
00035 #include "SequenceReader.h"
00036 #include "SequenceReaderFasta.h"
00037 #include "SequenceReaderFastq.h"
00038 #include "SequenceReaderString.h"
00039 #include "SequenceReaderMulti.h"
00040 #include "SequenceReaderLocal.h"
00041 #include "SequenceReaderFilter.h"
00042 //#include "SequenceReaderCodon.h"
00043 #include "GenerateTestFastaFiles.h"
00044 #include "assert.h"
00045 #include <string>
00046 #include <strstream>
00047 #include <iostream>
00048 
00049 // ### Function Definitions ###
00050 void capitalise( string& s )
00051 {
00052   for ( int i(0) ; i < s.size() ; ++i ) s[i] = toupper(s[i]);
00053 }
00054 void reverseString( string& seq )
00055 {
00056   string rc;
00057   for ( int i(0) ; i < seq.size() ; i++ )
00058   {
00059     if  ( ( seq[i] == 'A' ) || (seq[i] == 'a') ) rc = 'T' + rc; 
00060     else if ( ( seq[i] == 'T' ) || (seq[i] == 't') ) rc = 'A' + rc; 
00061     else if ( ( seq[i] == 'G' ) || (seq[i] == 'g') ) rc = 'C' + rc; 
00062     else if ( ( seq[i] == 'C' ) || (seq[i] == 'c') ) rc = 'G' + rc; 
00063     else throw SSAHAException();
00064   } // ~for i
00065 
00066   seq = rc;
00067 
00068 } // ~reverseString
00069 
00070 
00071 
00072 
00073 int main( void )
00074 {
00075   try
00076   {
00077 
00078   int numTests = 0;
00079 
00080   cout << "*******************************************" << endl << endl;
00081   cout << "Test of functions in SequenceReader modules" << endl << endl;
00082   cout << "*******************************************" << endl << endl;
00083 
00084   const int wordLength(10);
00085   std::ostrstream buffer;
00086   string s1,s2,s3,s4,s5;
00087 
00088   // ---
00089 
00090   cout << " --- Tests of member functions of SequenceReaderFasta ---\n\n";
00091   cout << "Test " << ++numTests <<": test of function getNextSequence" 
00092        << endl << endl;
00093 
00094   int numSeqs = 10;
00095   int seqSize = 57;
00096 
00097   // Generate a random sequence of (numSeqs*seqSize) base pairs ...
00098   BaseGenerator testBases( numSeqs * seqSize );
00099 
00100   // ... make a fasta file from this sequence, breaking the sequence into
00101   // sequences of seqSize base pairs, with no overlaps (i.e. numSeqs of them)
00102   // Haven't specified a file name, so will default to test_subject.fasta 
00103   testBases.generateSubjectFile(seqSize,0);
00104 
00105   SequenceReaderFasta testReader("test_subject.fasta",cout);
00106   WordSequence w;
00107   string expected, actual, name;
00108 
00109   // last Word of decoded WordSequence will not be full, and the blank spaces
00110   // will be decoded by printWord as 'A's. spareBases is a string of 'A's to
00111   // add on the end of the expected sequence string for it to match actual.
00112   //  const string spareBases( wordLength - ( seqSize % wordLength ), 'A');
00113 
00114   // TC 08.03.01 - improved printWord so it takes account of the number of
00115   // bases in the last word of a WordSequence (by looking at numBasesInLast)
00116   // Therefore above step no longer necessary.
00117   const string spareBases("");
00118 
00119 
00120 
00121   for ( int i(0) ; i < numSeqs ; i++ )
00122   {
00123   
00124     testReader.getNextSequence(w,wordLength);
00125     buffer << printWord(w,wordLength) << ends;
00126     actual=buffer.str();
00127     buffer.freeze(false); // release memory to write further stuff to buffer  
00128     buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00129  
00130     testBases.getBases(i*seqSize,seqSize,expected);
00131 
00132     expected += spareBases;
00133 
00134     testReader.getLastSequenceName(name);
00135     cout << "SequenceName     : " << name << endl;
00136     cout << "Expected sequence: " << expected << endl;
00137     cout << "Actual sequence  : " << actual   << endl;
00138 
00139     assert(expected == actual);
00140 
00141     w.clear();
00142 
00143   } // ~for
00144     
00145     cout << "Test passed!" << endl << endl; 
00146 
00147   // ---
00148 
00149   cout << "Test " << ++numTests <<": test of sequence numbering" 
00150        << endl << endl;
00151 
00152 
00153   cout << "Last sequence number read: " << testReader.getLastSequenceNumber()
00154        << endl;
00155 
00156   assert( testReader.getLastSequenceNumber() == numSeqs );
00157 
00158   int numInLast = testReader.getNextSequence( w, wordLength );
00159 
00160   cout << "getNextSequence returned " << numInLast 
00161        << ": should be -1 because all sequences have been read." << endl;
00162 
00163   assert( numInLast == -1 );
00164 
00165   cout << "Number of sequences in file: " << testReader.getNumSequencesInFile()
00166        << endl;
00167 
00168   assert( testReader.getNumSequencesInFile() == numSeqs );
00169 
00170   cout << "Rewinding to start of data ..." << endl;
00171 
00172   testReader.rewind();
00173 
00174   cout << "Last sequence number read: " << testReader.getLastSequenceNumber()
00175        << endl;
00176 
00177   assert( testReader.getLastSequenceNumber() == 0 );
00178  
00179   cout << "Checking getBitsPerSymbol returns 2" << endl;
00180 
00181   assert( testReader.getBitsPerSymbol()==2 );
00182 
00183   cout << "Test passed!" << endl << endl; 
00184 
00185 
00186   // ---
00187 
00188   cout << "Test " << ++numTests <<": test of getSequence" 
00189        << endl << endl;
00190 
00191     int toRead = 5;
00192 
00193     w.clear();
00194     testReader.getSequence(w,toRead,wordLength);
00195     buffer << printWord(w,wordLength) << ends;
00196     actual=buffer.str();
00197     buffer.freeze(false); // release memory to write further stuff to buffer  
00198     buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00199  
00200     testBases.getBases((toRead-1)*seqSize,seqSize,expected);
00201 
00202     expected += spareBases;
00203 
00204     cout << "Expected sequence: " << expected << endl;
00205     cout << "Actual sequence  : " << actual   << endl;
00206 
00207     assert(expected == actual);
00208 
00209     cout << toRead << " " << testReader.getLastSequenceNumber() << endl;
00210     assert(testReader.getLastSequenceNumber() == toRead );    
00211 
00212     w.clear();
00213 
00214 
00215     //    cout << 
00216     //   "Passing invalid sequence num to getSequence, should throw exception...\n";
00217 
00218     //    try
00219     //   {
00220     //    numInLast = testReader.getSequence( w, 987, wordLength );
00221     //    cout << "Error: exception should have been thrown!\n";
00222     //     exit(1);
00223     //   } // ~try
00224     //   catch (const SSAHAException& err )
00225     //   {
00226     //     cout << "Caught SSAHA exception: " << err.what() << "\n";
00227     //   }
00228 
00229     cout << "Passing invalid sequence number to getSequence ..." << endl;
00230 
00231     numInLast = testReader.getSequence( w, 987, wordLength );
00232 
00233     cout << "getNextSequence returned " << numInLast 
00234          << ": should be -1 because sequence number is invalid." << endl;
00235 
00236     assert( numInLast == -1 );
00237 
00238     cout << "Test passed!" << endl << endl; 
00239 
00240     // ---
00241 
00242     cout << "Test " << ++numTests <<": test of random access output functions" 
00243        << endl << endl;
00244 
00245     for ( int i(1) ; i <= numSeqs; i++ )
00246     {
00247       cout << testReader.getName(i) << endl;
00248       cout << testReader.getSideInfo(i) << endl;
00249       cout << testReader.getSource(i) << endl;
00250     } // ~for i
00251 
00252   // ---
00253 
00254   cout << " --- Tests of member functions of SequenceReaderFastq ---\n\n";
00255   cout << "Test " << ++numTests <<": test of function getNextSequence" 
00256        << endl << endl;
00257 
00258   //  int numSeqs = 10;
00259   // int seqSize = 57;
00260 
00261   // Generate a random sequence of (numSeqs*seqSize) base pairs ...
00262   //  BaseGenerator testBases( numSeqs * seqSize );
00263 
00264   // ... make a fasta file from this sequence, breaking the sequence into
00265   // sequences of seqSize base pairs, with no overlaps (i.e. numSeqs of them)
00266   // Haven't specified a file name, so will default to test_subject.fasta 
00267   testBases.generateSubjectFileFastq(seqSize,0);
00268 
00269   SequenceReaderFastq testReaderFastq("test_subject.fastq",cout);
00270   //  WordSequence w;
00271   //  string expected, actual, name;
00272 
00273   // last Word of decoded WordSequence will not be full, and the blank spaces
00274   // will be decoded by printWord as 'A's. spareBases is a string of 'A's to
00275   // add on the end of the expected sequence string for it to match actual.
00276   //  const string spareBases( wordLength - ( seqSize % wordLength ), 'A');
00277 
00278   for ( int i(0) ; i < numSeqs ; i++ )
00279   {
00280   
00281     testReaderFastq.getNextSequence(w,wordLength);
00282     buffer << printWord(w,wordLength) << ends;
00283     actual=buffer.str();
00284     buffer.freeze(false); // release memory to write further stuff to buffer  
00285     buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00286  
00287     testBases.getBases(i*seqSize,seqSize,expected);
00288 
00289     expected += spareBases;
00290 
00291     testReaderFastq.getLastSequenceName(name);
00292     cout << "SequenceName     : " << name << endl;
00293     cout << "Expected sequence: " << expected << endl;
00294     cout << "Actual sequence  : " << actual   << endl;
00295 
00296     assert(expected == actual);
00297 
00298     w.clear();
00299 
00300   } // ~for
00301     
00302     cout << "Test passed!" << endl << endl; 
00303 
00304   // ---
00305 
00306   cout << "Test " << ++numTests <<": test of sequence numbering" 
00307        << endl << endl;
00308 
00309 
00310   cout << "Last sequence number read: " << testReaderFastq.getLastSequenceNumber()
00311        << endl;
00312 
00313   assert( testReaderFastq.getLastSequenceNumber() == numSeqs );
00314 
00315   numInLast = testReaderFastq.getNextSequence( w, wordLength );
00316 
00317   cout << "getNextSequence returned " << numInLast 
00318        << ": should be -1 because all sequences have been read." << endl;
00319 
00320   assert( numInLast == -1 );
00321 
00322   cout << "Number of sequences in file: " << testReaderFastq.getNumSequencesInFile()
00323        << endl;
00324 
00325   assert( testReaderFastq.getNumSequencesInFile() == numSeqs );
00326 
00327   cout << "Rewinding to start of data ..." << endl;
00328 
00329   testReaderFastq.rewind();
00330 
00331   cout << "Last sequence number read: " << testReaderFastq.getLastSequenceNumber()
00332        << endl;
00333 
00334   assert( testReaderFastq.getLastSequenceNumber() == 0 );
00335  
00336   cout << "Test passed!" << endl << endl; 
00337 
00338   cout << "Checking getBitsPerSymbol returns 2" << endl;
00339 
00340   assert( testReader.getBitsPerSymbol()==2 );
00341 
00342   // ---
00343 
00344   cout << "Test " << ++numTests <<": test of getSequence" 
00345        << endl << endl;
00346 
00347     toRead = 5;
00348 
00349     w.clear();
00350     testReaderFastq.getSequence(w,toRead,wordLength);
00351     buffer << printWord(w,wordLength) << ends;
00352     actual=buffer.str();
00353     buffer.freeze(false); // release memory to write further stuff to buffer  
00354     buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00355  
00356     testBases.getBases((toRead-1)*seqSize,seqSize,expected);
00357 
00358     expected += spareBases;
00359 
00360     cout << "Expected sequence: " << expected << endl;
00361     cout << "Actual sequence  : " << actual   << endl;
00362 
00363 
00364     assert(expected == actual);
00365 
00366     cout << toRead << " " << testReaderFastq.getLastSequenceNumber() << endl;
00367     assert(testReaderFastq.getLastSequenceNumber() == toRead );    
00368 
00369     w.clear();
00370 
00371     //    cout << 
00372     //   "Passing invalid sequence num to getSequence, should throw exception...\n";
00373 
00374     //    try
00375     //    {
00376     //    numInLast = testReaderFastq.getSequence( w, 987, wordLength );
00377     //     cout << "Error: exception should have been thrown!\n";
00378     //     exit(1);
00379     //   } // ~try
00380     //   catch (const SSAHAException& err )
00381     //   {
00382     //     cout << "Caught SSAHA exception: " << err.what() << "\n";
00383     //   }
00384 
00385     //    cout << "getNextSequence returned " << numInLast 
00386     //        << ": should be -1 because sequence number is invalid." << endl;
00387 
00388     //    assert( numInLast == -1 );
00389 
00390     cout << "Passing invalid sequence number to getSequence ..." << endl;
00391 
00392     numInLast = testReaderFastq.getSequence( w, 987, wordLength );
00393 
00394     cout << "getNextSequence returned " << numInLast 
00395          << ": should be -1 because sequence number is invalid." << endl;
00396 
00397     assert( numInLast == -1 );
00398 
00399     cout << "Test passed!" << endl << endl; 
00400 
00401     // ---
00402 
00403     cout << "Test " << ++numTests <<": test of random access output functions" 
00404        << endl << endl;
00405 
00406     for ( int i(1) ; i <= numSeqs; i++ )
00407     {
00408       cout << testReaderFastq.getName(i) << endl;
00409       cout << testReaderFastq.getSideInfo(i) << endl;
00410       cout << testReaderFastq.getSource(i) << endl;
00411     } // ~for i
00412 
00413     // ---
00414 
00415     cout << "Test " << ++numTests <<": test of SequenceReaderString" 
00416          << endl << endl;
00417     //        12345678901234567890123456789012345678901234567890
00418     expected="ATGCCTGCGTATGTAATGCTGGCATTGGTGCGTTTAATGCTCTCCTGATG";
00419     SequenceReaderString stringReader(expected,cout);
00420     expected+="AAAAAAAAAA"; //%%%
00421 
00422     assert(stringReader.getNumSequencesInFile()==1);
00423 
00424     w.clear();
00425     numInLast = stringReader.getNextSequence(w,wordLength);
00426     buffer << printWord(w,wordLength) << ends;
00427     actual=buffer.str();
00428     buffer.freeze(false); // release memory to write further stuff to buffer  
00429     buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00430     
00431     cout << "expected: " << expected << endl;
00432     cout << "actual:   " << actual << " " << w.size() << " " 
00433          << w.getNumBasesInLast() << endl;
00434 
00435 
00436     assert( numInLast == 0 );
00437     assert( expected == actual );    
00438 
00439     cout << "Checking getBitsPerSymbol returns 2" << endl;
00440 
00441     assert( stringReader.getBitsPerSymbol()==2 );
00442 
00443 
00444 
00445     cout << "Test passed!" << endl << endl; 
00446 
00447     // ---
00448 
00449     cout << "Test " << ++numTests <<": test of SequenceReaderMulti" 
00450          << endl << endl;
00451 
00452     SequenceReaderMulti multiReader(cout);
00453 
00454     int numReaders = 4;
00455     
00456     for ( int i(0) ; i < numReaders ; i ++ ) 
00457       multiReader.addReader(testReader.clone());
00458 
00459     assert( multiReader.getNumSequencesInFile() == 
00460             ( numReaders * testReader.getNumSequencesInFile() ) );
00461 
00462     WordSequence wSingle, wMulti;
00463     multiReader.rewind();
00464 
00465     for ( int i(0) ; i < numReaders ; i ++ ) 
00466     {
00467 
00468       testReader.rewind();
00469       for ( int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ ) 
00470       {
00471                 cout << i << " " << j << endl;
00472         wSingle.clear(); wMulti.clear(); 
00473         assert( multiReader.getBitsPerSymbol()==2 );
00474         assert( testReader.getNextSequence(wSingle, wordLength) ==  
00475                 multiReader.getNextSequence(wMulti, wordLength) );
00476         //         cout << multiReader.getLastSequenceNumber() << " " 
00477         //              << testReader.getLastSequenceNumber() << endl;
00478         testReader.getLastSequenceName(expected);
00479         multiReader.getLastSequenceName(actual);
00480         cout << expected << " " << actual << endl;
00481         assert( expected == actual );
00482 
00483         assert( multiReader.getLastSequenceNumber()
00484                 == ( i*testReader.getNumSequencesInFile() )
00485                    + testReader.getLastSequenceNumber() );
00486         assert ( wSingle == wMulti );
00487       } // ~for j
00488 
00489     } // ~for i
00490 
00491     wMulti.clear();
00492    
00493     cout << multiReader.getLastSequenceNumber()  << endl;
00494 
00495     assert( multiReader.getLastSequenceNumber() == 
00496             ( numReaders * testReader.getNumSequencesInFile() ) );
00497     assert( multiReader.getNextSequence( wMulti,wordLength ) == -1 );
00498 
00499     multiReader.rewind();
00500 
00501     assert( multiReader.getLastSequenceNumber() == 0 );   
00502 
00503     cout << "Checking getBitsPerSymbol returns 2" << endl;
00504 
00505     assert( multiReader.getBitsPerSymbol()==2 );
00506 
00507     cout << "Test passed!" << endl << endl; 
00508 
00509     // ---
00510 
00511     cout << "Test " << ++numTests <<": test of make/printBase/Residue" 
00512          << endl << endl;
00513 
00514     {
00515       Word w1;
00516       string s1;
00517 
00518       s1="TaCgTTgtAGc"; 
00519       w1=makeBase(s1);
00520       buffer << printBase(w1,11) << ends;
00521       actual=buffer.str();
00522       buffer.freeze(false); 
00523       // release memory to write further stuff to buffer  
00524       buffer.seekp(0,ios::beg); 
00525       // ensure next write is to start of buffer
00526       cout << actual << "\n";
00527       cout << s1 << "\n";
00528       capitalise(s1);
00529       assert( actual == s1 );
00530 
00531       s1="ACDEFG"; 
00532       w1=makeResidue(s1);
00533       buffer << printResidue(w1,6) << ends;
00534       actual=buffer.str();
00535       buffer.freeze(false); 
00536       // release memory to write further stuff to buffer  
00537       buffer.seekp(0,ios::beg); 
00538       // ensure next write is to start of buffer
00539       cout << actual << "\n";
00540       cout << s1 << "\n";
00541       capitalise(s1);
00542       assert( actual == s1 );
00543 
00544       s1="NMLKIH"; 
00545       w1=makeResidue(s1);
00546       buffer << printResidue(w1,6) << ends;
00547       actual=buffer.str();
00548       buffer.freeze(false); 
00549       // release memory to write further stuff to buffer  
00550       buffer.seekp(0,ios::beg); 
00551       // ensure next write is to start of buffer
00552       cout << actual << "\n";
00553       cout << s1 << "\n";
00554       capitalise(s1);
00555       assert( actual == s1 );
00556 
00557       //      s1="PRTQSU"; 
00558       s1="PRTQSX"; 
00559       w1=makeResidue(s1);
00560       buffer << printResidue(w1,6) << ends;
00561       actual=buffer.str();
00562       buffer.freeze(false); 
00563       // release memory to write further stuff to buffer  
00564       buffer.seekp(0,ios::beg); 
00565       // ensure next write is to start of buffer
00566       cout << actual << "\n";
00567       cout << s1 << "\n";
00568       capitalise(s1);
00569       assert( actual == s1 );
00570 
00571       s1="VWY***"; 
00572       w1=makeResidue(s1);
00573       buffer << printResidue(w1,6) << ends;
00574       actual=buffer.str();
00575       buffer.freeze(false); 
00576       // release memory to write further stuff to buffer  
00577       buffer.seekp(0,ios::beg); 
00578       // ensure next write is to start of buffer
00579       cout << actual << "\n";
00580       cout << s1 << "\n";
00581       capitalise(s1);
00582       assert( actual == s1 );
00583 
00584       s1="acdefg"; 
00585       w1=makeResidue(s1);
00586       buffer << printResidue(w1,6) << ends;
00587       actual=buffer.str();
00588       buffer.freeze(false); 
00589       // release memory to write further stuff to buffer  
00590       buffer.seekp(0,ios::beg); 
00591       // ensure next write is to start of buffer
00592       cout << actual << "\n";
00593       cout << s1 << "\n";
00594       capitalise(s1);
00595       assert( actual == s1 );
00596 
00597       s1="nmlkih"; 
00598       w1=makeResidue(s1);
00599       buffer << printResidue(w1,6) << ends;
00600       actual=buffer.str();
00601       buffer.freeze(false); 
00602       // release memory to write further stuff to buffer  
00603       buffer.seekp(0,ios::beg); 
00604       // ensure next write is to start of buffer
00605       cout << actual << "\n";
00606       cout << s1 << "\n";
00607       capitalise(s1);
00608       assert( actual == s1 );
00609 
00610       //      s1="prtqsu"; 
00611       s1="prtqsx"; 
00612       w1=makeResidue(s1);
00613       buffer << printResidue(w1,6) << ends;
00614       actual=buffer.str();
00615       buffer.freeze(false); 
00616       // release memory to write further stuff to buffer  
00617       buffer.seekp(0,ios::beg); 
00618       // ensure next write is to start of buffer
00619       cout << actual << "\n";
00620       cout << s1 << "\n";
00621       capitalise(s1);
00622       assert( actual == s1 );
00623 
00624       s1="vwy***"; 
00625       w1=makeResidue(s1);
00626       buffer << printResidue(w1,6) << ends;
00627       actual=buffer.str();
00628       buffer.freeze(false); 
00629       // release memory to write further stuff to buffer  
00630       buffer.seekp(0,ios::beg); 
00631       // ensure next write is to start of buffer
00632       cout << actual << "\n";
00633       cout << s1 << "\n";
00634       capitalise(s1);
00635       assert( actual == s1 );
00636 
00637 
00638     }
00639     cout << "Test passed!" << endl << endl; 
00640 
00641 
00642     // ---
00643 
00644     cout << "Test " << ++numTests <<": test of SequenceEncoder"
00645          << endl << endl;
00646     {
00647       WordSequence W;
00648       SequenceEncoderDNA encoder(12);
00649 
00650       cout << "Checking getBitsPerSymbol returns 2" << endl;
00651 
00652       assert( encoder.getBitsPerSymbol()==2 );
00653 
00654       cout << "Checking mode changes...\n";
00655 
00656       string s1="nOne@rEv@lID";
00657 
00658       cout << "... by default, should ignore invalid characters\n";
00659       encoder.linkSeq(W);
00660       encoder.encode(s1);
00661       encoder.unlinkSeq();
00662       cout << W.size() << " " << W[0] << endl;
00663       // puts a single word into seq, but does nothing to it
00664       assert(W.size()==1);
00665       assert(W[0]==0);
00666 
00667       cout << "...changing mode to replace invalid chars with 'G'\n";
00668       SequenceReaderModeReportReplace mode1('G');
00669       //      encoder.changeMode( new SequenceReaderModeReportReplace('G') );
00670       encoder.changeMode( &mode1 );
00671       encoder.linkSeq(W);
00672       encoder.encode(s1);
00673       encoder.unlinkSeq();
00674       cout << W.size() << " " << printBase(W[0],12) << endl;
00675       assert(W.size()==2);
00676       assert(W[0]==makeBase("GGGGGGGGGGGG"));
00677 
00678       cout << "...checking mode changes are passed when copied\n";
00679       W.clear();
00680       SequenceEncoder copy(encoder);
00681       // shouldn't have to link, should be linked to same as encoder
00682       copy.linkSeq(W);
00683       copy.encode(s1);
00684       copy.unlinkSeq();
00685       cout << W.size() << " " << printBase(W[0],12) << endl;
00686       assert(W.size()==2);
00687       assert(W[0]==makeBase("GGGGGGGGGGGG"));
00688 
00689       cout << "...changing copy to 'ignore' mode\n";
00690       W.clear();
00691       SequenceReaderModeIgnore mode2;
00692       //      copy.changeMode( new SequenceReaderModeIgnore );
00693       copy.changeMode( &mode2 );
00694       copy.linkSeq(W);
00695       copy.encode(s1);
00696       copy.unlinkSeq();
00697       // puts a single word into seq, but does nothing to it
00698       assert(W.size()==1);
00699       assert(W[0]==0);
00700 
00701     
00702              
00703       
00704     }
00705     
00706     cout << "Test passed!" << endl << endl; 
00707 
00708 
00709     // ---
00710 
00711     cout << "Test " << ++numTests <<": test of SequenceEncoderDNA" 
00712          << endl << endl;
00713 
00714     {
00715       //  cout << "for ttDNA\n";
00716       //  print(ttDNA);
00717       //  cout << "for ttProtein\n";
00718       //  print(ttProtein);
00719       //  cout << "for ttCodon\n";
00720       //  print(ttCodon);
00721 
00722       int wordLength(12);
00723 
00724       SequenceEncoderDNA encoder(wordLength);
00725 
00726       string s1="AGCcGTGGGTTTG", s2="TTGgGTTGGGGAC", s3="AGCTGaTTTGCAAGtCAA";
00727 
00728       WordSequence W;
00729 
00730       encoder.linkSeq(W);
00731       encoder.encode(s1);
00732       encoder.encode(s2);
00733       encoder.encode(s3);
00734       encoder.unlinkSeq();
00735       buffer << printBase(W,wordLength) << ends;
00736       actual=buffer.str();
00737       buffer.freeze(false); 
00738       // release memory to write further stuff to buffer  
00739       buffer.seekp(0,ios::beg); 
00740       // ensure next write is to start of buffer
00741       cout << printBase(W,wordLength) << "\n";
00742       cout << s1+s2+s3 << "\n";
00743 
00744       expected = s1+s2+s3;
00745       capitalise(expected);
00746       cout << actual << actual.size() << endl << expected << expected.size() << endl;
00747 
00748       assert(actual==expected);
00749 
00750       W.clear();
00751 
00752     }
00753     cout << "Test passed!" << endl << endl; 
00754 
00755     // ---
00756 
00757     cout << "Test " << ++numTests <<": additional tests of SequenceEncoderDNA" 
00758          << endl << endl;
00759 
00760     {
00761 
00762       string fiftyAs="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
00763       string thisString;
00764       WordSequence w;
00765       int numInLast;
00766       SequenceReaderModeIgnore ignore;
00767       SequenceReaderModeReplace replace('T');      
00768       SequenceReaderModeFlagReplace tag('T');
00769       int dudPos; 
00770       Word dudValue;
00771 
00772       
00773       for (int wordLength(9); wordLength <= 12; wordLength++)
00774       {
00775         for (int i(0) ; i < fiftyAs.size(); i++)
00776         {
00777           thisString=fiftyAs;
00778           thisString[i]='X';
00779           w.clear();
00780           dudPos=i/wordLength;
00781           dudValue=3<<(2*(wordLength-(i%wordLength)-1));
00782           cout << "dud " << wordLength << " " << i << " " 
00783                << dudPos << " " << printWord(dudValue, wordLength) << endl;
00784 
00785           SequenceReaderString stringReader(thisString,cout);
00786 
00787 
00788           assert(stringReader.getNumSequencesInFile()==1);
00789 
00790           // ignore mode
00791           cout << "testing ignore mode" << endl;
00792 
00793           stringReader.changeMode(&ignore);
00794           w.clear();
00795           numInLast = stringReader.getNextSequence(w,wordLength);
00796           assert(numInLast==(fiftyAs.size()-1)%wordLength);
00797           assert((w.size()-1)*wordLength+numInLast==fiftyAs.size()-1);
00798 
00799           for (WordSequence::iterator j(w.begin()); j!=w.end();j++)
00800           {
00801             assert (*j==0);
00802           }
00803 
00804           // replace mode
00805           cout << "testing replace mode" << endl;
00806 
00807           stringReader.rewind();
00808           stringReader.changeMode(&replace);
00809           w.clear();
00810           numInLast = stringReader.getNextSequence(w,wordLength);
00811           cout << "blegh " << wordLength << " " << i  << " " << fiftyAs.size() << " " << numInLast << " " << w.size() << endl;
00812           
00813           assert(numInLast==fiftyAs.size()%wordLength);
00814           assert((w.size()-1)*wordLength+numInLast==fiftyAs.size());
00815 
00816           for (WordSequence::iterator j(w.begin()); j!=w.end();j++)
00817           {
00818             cout << wordLength << " " << i << " " << (j-w.begin()) << " "
00819                  << *j << " " << gCursedWord << endl;
00820             if (j-w.begin()==dudPos) assert (*j==dudValue);
00821             else assert (*j==0);
00822           }
00823 
00824           // flag mode
00825           cout << "testing flag mode" << endl;
00826 
00827           stringReader.rewind();
00828           stringReader.changeMode(&tag);
00829           w.clear();
00830           numInLast = stringReader.getNextSequence(w,wordLength);
00831           assert(numInLast==fiftyAs.size()%wordLength);
00832           assert((w.size()-1)*wordLength+numInLast==fiftyAs.size());
00833 
00834           for (WordSequence::iterator j(w.begin()); j!=w.end();j++)
00835           {
00836             cout << wordLength << " " << i << " " << (j-w.begin()) << " "
00837                  << *j << " " << gCursedWord << endl;
00838             if (j-w.begin()==dudPos) assert (*j==dudValue|gCursedWord);
00839             else assert (*j==0);
00840           }
00841           
00842 
00843 
00844         } // ~for i
00845 
00846 
00847       } // ~test
00848 
00849       //  cout << "for ttDNA\n";
00850       //  print(ttDNA);
00851       //  cout << "for ttProtein\n";
00852       //  print(ttProtein);
00853       //  cout << "for ttCodon\n";
00854       //  print(ttCodon);
00855 
00856       int wordLength(12);
00857 
00858       SequenceEncoderDNA encoder(wordLength);
00859 
00860       string s1="AGCcGTGGGTTTG", s2="TTGgGTTGGGGAC", s3="AGCTGaTTTGCAAGtCAA";
00861 
00862       WordSequence W;
00863 
00864       encoder.linkSeq(W);
00865       encoder.encode(s1);
00866       encoder.encode(s2);
00867       encoder.encode(s3);
00868       encoder.unlinkSeq();
00869       buffer << printBase(W,wordLength) << ends;
00870       actual=buffer.str();
00871       buffer.freeze(false); 
00872       // release memory to write further stuff to buffer  
00873       buffer.seekp(0,ios::beg); 
00874       // ensure next write is to start of buffer
00875       cout << printBase(W,wordLength) << "\n";
00876       cout << s1+s2+s3 << "\n";
00877 
00878       expected = s1+s2+s3;
00879       capitalise(expected);
00880       cout << actual << actual.size() << endl << expected << expected.size() << endl;
00881 
00882       assert(actual==expected);
00883 
00884       W.clear();
00885 
00886     }
00887     cout << "Test passed!" << endl << endl; 
00888 
00889     // ---
00890 
00891     cout << "Test " << ++numTests <<": test of SequenceReaderStringProtein" 
00892          << endl << endl;
00893 
00894     { //                 1....2....3....4....5....6....7....
00895       //      string protString("THEQUICK*R*WNF***UMPS*VERTHELA*YD*G");
00896       string protString("THEQXICK*R*WNF***XMPS*VERTHELA*YD*G");
00897 
00898       SequenceReaderStringProtein sr(protString,cout);
00899 
00900       int wordLength(5);
00901       WordSequence W, W2;
00902       sr.getNextSequence(W,wordLength);
00903 
00904       assert( sr.getNextSequence(W,wordLength) == -1 );
00905 
00906       sr.rewind();
00907 
00908       sr.getNextSequence(W2,wordLength);
00909 
00910       assert( W==W2 );
00911 
00912       string wordString;
00913 
00914       cout << "expected: " << protString << endl;
00915       cout << "actual:   " << printResidue(W,wordLength) << endl;
00916       
00917 
00918       for ( int i(0),j(0);  i < protString.size() ; i+=wordLength,j++ )
00919         {
00920           wordString = protString.substr(i, wordLength);
00921           cout << wordString << " " << printResidue(W[j],wordLength) << endl;
00922           assert(W[j]==makeResidue(wordString));
00923         }
00924 
00925 
00926       cout << "Checking getBitsPerSymbol returns 5" << endl;
00927 
00928       assert( sr.getBitsPerSymbol()==5 );
00929 
00930     }
00931     cout << "Test passed!" << endl << endl; 
00932 
00933     // ---
00934 
00935     cout << "Test " << ++numTests <<": test of SequenceEncoderProtein" 
00936          << endl << endl;
00937 
00938     {
00939 
00940       int wordLength(5);
00941 
00942       SequenceEncoderProtein encoder(wordLength);
00943 
00944       cout << "Checking getBitsPerSymbol returns 5" << endl;
00945 
00946       assert( encoder.getBitsPerSymbol()==5 );
00947 
00948       //      string s1="YNLHEPCSADKFWQMV", s2="GI*TRUnvqramyg", s3="pswhcdfluteik";
00949       string s1="YNLHEPCSADKFWQMV", s2="GI*TRXnvqramyg", s3="pswhcdflxteik";
00950 
00951       WordSequence W;
00952 
00953       encoder.linkSeq(W);
00954       encoder.encode(s1);
00955       encoder.encode(s2);
00956       encoder.encode(s3);
00957       encoder.unlinkSeq();
00958       buffer << printResidue(W,wordLength) << ends;
00959       actual=buffer.str();
00960       buffer.freeze(false); 
00961       // release memory to write further stuff to buffer  
00962       buffer.seekp(0,ios::beg); 
00963       // ensure next write is to start of buffer
00964       cout << printResidue(W,wordLength) << "\n";
00965       cout << s1+s2+s3 << "\n";
00966 
00967       expected = s1+s2+s3; 
00968       capitalise(expected);
00969       assert(actual==expected);
00970 
00971       W.clear();
00972 
00973     }
00974 
00975     cout << "Test passed!" << endl << endl; 
00976 
00977     // ---
00978 
00979     cout << "Test " << ++numTests <<": test of codonize functions" 
00980          << endl << endl;
00981 
00982     {
00983       string allCodonsDNA ="taagcatgtgatgaatttggtcacataaaacttatgaatccccagcgttcaaccgtttggtat";
00984 
00985       CodonList c1, c2;
00986       WordSequence w1,w2,w1Rev;
00987 
00988       for (int k(0); k < 15; k++)
00989       {
00990         w1.clear(); w2.clear(); w1Rev.clear();
00991       SequenceReaderString reader(allCodonsDNA);
00992         reader.rewind();
00993         reader.getNextSequence(w1,gMaxBasesPerWord);
00994         reader.rewind();
00995         reader.getNextSequence(w2,gMaxBasesPerWord-1);
00996  
00997       for (int i(0) ; i <gNumReadingFrames; i++ )
00998       {
00999         c1.clear(); c2.clear();
01000         codonize( w1, c1, i );
01001         codonizeAndFlag( w2, c2, i );
01002         cout << "c1: " << c1 << endl;
01003         cout << "c2: " << c2 << endl;
01004         assert (c1==c2);
01005       }
01006 
01007       reverseComplement(w1, w1Rev, gMaxBasesPerWord);
01008       
01009 
01010       for (int i(0) ; i <gNumReadingFrames; i++ )
01011       {
01012         c1.clear(); c2.clear();
01013         codonize( w1Rev, c1, i );
01014         codonizeAndFlagReverse( w2, c2, i );
01015         cout << "c1: " << c1 << endl;
01016         cout << "c2: " << c2 << endl;
01017         assert (c1==c2);
01018       }
01019 
01020       allCodonsDNA=allCodonsDNA.substr(0,allCodonsDNA.size()-1);
01021       } // ~for k
01022 
01023     } // ~test
01024 
01025     cout << "Test passed!" << endl << endl; 
01026 
01027 
01028     // ---
01029 
01030 #ifdef SEQUENCE_READER_CODON_NO_LONGER_USED
01031     cout << "Test " << ++numTests <<": test of SequenceReaderCodon" 
01032          << endl << endl;
01033 
01034     {
01035 
01036 
01037       int wordLength(5);
01038       const string expectedCodons((string)gCodonNames);
01039       SequenceReaderStringProtein exp(expectedCodons);
01040       WordSequence expectedSeq;
01041       exp.getNextSequence( expectedSeq, wordLength );
01042       cout << printResidue( expectedSeq, wordLength ) << endl;
01043 
01044 
01045       string allPossibleCodons;
01046       string allAs("");
01047       for ( int i(0) ; i < 4 ; i++ )
01048         for ( int j(0) ; j < 4 ; j ++ )
01049           for ( int k(0) ; k < 4 ; k ++ ) 
01050             {
01051               allPossibleCodons += gBaseNames[i];
01052               allPossibleCodons += gBaseNames[j];
01053               allPossibleCodons += gBaseNames[k];
01054             }
01055 
01056 
01057       cout << allPossibleCodons << endl;
01058 
01059       for ( int readFrame(0); readFrame < 6 ; readFrame ++ )
01060       {
01061         if (readFrame ==3) allAs="";
01062         string toBeCoded(allAs+allPossibleCodons);
01063         if (readFrame>=3) reverseString(toBeCoded);
01064      
01065 
01066         SequenceReaderString readDNA(toBeCoded,cout);
01067         SequenceReaderCodon  readCodon(readDNA,cout);
01068 
01069         assert( readCodon.getBitsPerSymbol()==5 );
01070 
01071         WordSequence w;
01072         string name;
01073 
01074         readDNA.getNextSequence( w, 12 );
01075         cout << printBase( w, 12 ) << endl;
01076         readDNA.rewind(); w.clear();
01077 
01078         for ( int perm(0) ; perm < 6 ; perm++ )
01079         {
01080           readCodon.getNextSequence( w, wordLength );
01081           readCodon.getLastSequenceName(name);
01082           cout << name << endl;
01083           cout << printResidue( w, wordLength ) << endl;        
01084           if (readFrame==perm) 
01085           {
01086             cout << printResidue( expectedSeq, wordLength ) << endl;    
01087             assert(w==expectedSeq);
01088           } // ~if
01089         } // ~for
01090         // shift read frame by 1
01091         allAs+="A";
01092         assert(readCodon.getNextSequence( w, wordLength )==-1);
01093 
01094       }
01095 
01096       cout << "testing SequenceEncoderCodon...\n";
01097 
01098  //       SequenceReaderString reader(allPossibleCodons);
01099 //        WordSequence fd,rv, fdout, rvout;
01100 //        reader.getNextSequence( fd, 16 );
01101 //        reverseComplement( fd, rv, 16 );
01102 
01103 //        SequenceEncoderCodon enc(5);
01104       
01105 //        CodonList codons;
01106 
01107 //        enc.setWordLength(5);
01108 
01109 //        enc.linkSeq(fdout);
01110 
01111 //        codonize( fd, codons, 0 );
01112 
01113 //        enc.encode( codons);
01114 
01115 //        enc.unlinkSeq();
01116 
01117 //        codons.clear();
01118 
01119 //        enc.setReverse();
01120 
01121 //        enc.linkSeq(rvout);
01122 
01123 //        codonize( rv, codons, 0 );
01124 
01125 //        enc.encode( codons );
01126       
01127 //        enc.unlinkSeq();
01128 
01129 //        string sf,sr;
01130 
01131 //        buffer << printResidue(fdout,5) << ends;
01132 //        sf=buffer.str();
01133 //        buffer.freeze(false); 
01134 //        // release memory to write further stuff to buffer  
01135 //        buffer.seekp(0,ios::beg); 
01136 
01137 //        buffer << printResidue(rvout,5) << ends;
01138 //        sr=buffer.str();
01139 //        buffer.freeze(false); 
01140 //        // release memory to write further stuff to buffer  
01141 //        buffer.seekp(0,ios::beg); 
01142 
01143 //        cout << sf << endl << sr << endl;
01144 
01145 //        reverse( sr.begin(), sr.end() );
01146 
01147 //        cout << sf << endl << sr << endl;
01148 
01149 //        assert (sf==sr);
01150 
01151 
01152 
01153     }
01154     cout << "Test passed!" << endl << endl; 
01155 #endif
01156     // ---
01157 
01158        cout << "Test " << ++numTests <<": test of SequenceReaderLocal" 
01159       << endl << endl;
01160 
01161           SequenceReaderLocal localReader(testReader,wordLength,cout);
01162 
01163           assert (    localReader.getNumSequencesInFile()
01164                ==  testReader.getNumSequencesInFile() );
01165 
01166             for ( int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ ) 
01167         {
01168           wSingle.clear(); wMulti.clear(); 
01169 
01170 
01171 
01172         assert( testReader.getNextSequence(wSingle, wordLength) ==  
01173                localReader.getNextSequence(wMulti, wordLength) );
01174         testReader.getLastSequenceName(expected);
01175         testReader.getLastSequenceName(actual);
01176                 cout << expected << " " << actual << endl;
01177         assert( expected == actual );
01178                  cout << localReader.getLastSequenceNumber() << " " 
01179                       << testReader.getLastSequenceNumber() << endl;
01180  
01181         assert(    localReader.getLastSequenceNumber()
01182                 ==  testReader.getLastSequenceNumber() );
01183         assert ( wSingle == wMulti );
01184       } // ~for j
01185 
01186       assert( localReader.getNextSequence(wMulti, wordLength)== -1 );
01187 
01188 
01189         //    buffer << printWord(w,wordLength) << ends;
01190         //    actual=buffer.str();
01191         //  buffer.freeze(false); // release memory to write further stuff to buffer  
01192         //  buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
01193  
01194     cout << "Test passed!" << endl << endl; 
01195 
01196     // ---
01197 
01198    cout << "Test " << ++numTests <<": test of SequenceReaderFilter" 
01199          << endl << endl;
01200 
01201    {
01202 
01203      SequenceReader* pFilteree = new SequenceReaderFastq("test_filter.fastq");
01204      SequenceReaderFilter filterer(pFilteree,"test_filter.fail");
01205 
01206      cout << "Check details match original SeqReader..." << endl;
01207      assert(filterer.getBitsPerSymbol()==pFilteree->getBitsPerSymbol());
01208      assert(filterer.getSourceDataType()==pFilteree->getSourceDataType());
01209 
01210      cout << "Check cycles through all sequences OK..." << endl;
01211      vector<WordSequence> seqs;
01212      vector<string> names;
01213 
01214      seqs.push_back(WordSequence());
01215      names.push_back(string());
01216 
01217      while (filterer.getNextSequence(seqs.back(),16)!=-1)
01218      {
01219        filterer.getLastSequenceName(names.back());
01220        cout << names.back() << endl << printWord(seqs.back(),16) << endl;
01221        seqs.push_back(WordSequence());
01222        names.push_back(string());
01223        
01224      }
01225 
01226      cout << "Read all sequences OK" << endl;
01227 
01228      cout << "Check no sequences have been \"lost\"..." << endl; 
01229      cout << pFilteree->getNumSequencesInFile()
01230           << " seqs in original file\n";
01231      cout << filterer.getNumSequencesInFile() 
01232           << " seqs in filtered file\n";
01233      cout << filterer.getNumFiltered() 
01234           << " filtered seqs\n";
01235 
01236      assert(    pFilteree->getNumSequencesInFile() 
01237              ==   filterer.getNumSequencesInFile() 
01238                 + filterer.getNumFiltered() );
01239 
01240      
01241      cout 
01242        << "Check output of getSequence matches output of getNextSequence..."
01243        << endl;
01244      WordSequence seq;
01245      string name;
01246 
01247      for ( int i(1) ; i <= filterer.getNumSequencesInFile() ; i++ )
01248      {
01249        filterer.getSequence(seq, i, 16);
01250        filterer.getLastSequenceName(name);
01251        cout << name << endl << printWord(seq,16) << endl;
01252        assert(seq==seqs[i-1]);
01253        assert(name==names[i-1]);
01254      }
01255 
01256      cout << "Check rewind works..." << endl;
01257      filterer.rewind();
01258 
01259      assert (filterer.getNextSequence(seq,16)!=-1);
01260 
01261      assert (seq==seqs[0]);
01262 
01263      // Check won't accept dodgy sequence numbers
01264      assert( filterer.getSequence
01265              (seq, filterer.getNumSequencesInFile()+1, 16) 
01266              == -1 );
01267      assert( filterer.getSequence(seq, 0, 16) == -1 );
01268 
01269 
01270 
01271    }
01272    cout << "Test passed!" << endl << endl; 
01273 
01274    // ---
01275 
01276    cout << "Test " << ++numTests 
01277         <<": test of SourceReader functionality of SequenceReaderFile" 
01278          << endl << endl;
01279 
01280    {
01281      SequenceReaderFasta testReader("test_extract.fasta");
01282      SequenceReaderMulti testReaderMulti(cout);
01283      testReaderMulti.addReader(testReader.clone());
01284 
01285      testReader.saveIndex("test_index1");
01286      testReaderMulti.saveIndex("test_index2");
01287 
01288      //     SourceReaderIndex testIndexReader1("test_index1");
01289      SourceReaderIndex testIndexReader("test_index2");
01290 
01291 
01292      string data=
01293        "tggtaaaaaaggttttcctgcatattaatatcatggtataggtgattgctgatggtatgtctttcttttactagatcatgcactgttcaggagcagaaaaatgtggctgttttttccaccacatattgtcctactactgtgtaccctgtctcagtaaatgccagtcagttacccaagctaaatgtccttgattcttccattctttcactacttccctggttcacattccagttggccaccaggtcatacttattttatcttaaaaattctctctctgaaatgcaaatcaaaactacgatgagataccatctcaaaccagttagtttgagatggcagttattaaaaagttcaaacataacagatgctgcgaggttgtggagaaaagaaaacacttatgcactgttggtgggagtgtaaattaattcaaccattgtggaaagtagtgtggtgatttctcaaagagctgaaagcagaactaccaatcaatccagccattccattactg-gtatatacccagaggaatataaattgttctatcataaagacacatgcataggtgtgtccattgtatcactactta-catagcaaagacatgaaatcaaccta-atgcccactggtgatagac";
01294 
01295      int seqSize=data.size(); 
01296 
01297      cout << seqSize << endl; 
01298 
01299      //     vector<char> v1, v2, v1m, v2m, v1i, v2i;
01300      char* v1; 
01301      char* v2; 
01302      char* v1m; 
01303      char* v2m; 
01304      char* v1i; 
01305      char* v2i; 
01306 
01307      string n1,n2;
01308 
01309      for (int j(1) ; j <= seqSize  ; j++)
01310      {
01311        cout << "j: " << j << endl;
01312        //      v1.clear(); v2.clear(); 
01313        //    v1m.clear(); v2m.clear();
01314        //   v1i.clear(); v2i.clear();
01315 
01316        testReader.extractSource(&v1, 1, j, seqSize );
01317        testReader.extractSource(&v2, 2, j, seqSize );
01318 
01319        testReaderMulti.extractSource(&v1m, 1, j, seqSize );
01320        testReaderMulti.extractSource(&v2m, 2, j, seqSize );
01321 
01322        testIndexReader.extractSource(&v1i, 1, j, seqSize );
01323        testIndexReader.extractSource(&v2i, 2, j, seqSize );
01324 
01325        //       assert(v1==v2);
01326 
01327        //       assert(v1==v1m);
01328        //   assert(v2==v2m);
01329 
01330        //       assert(v1==v1i);
01331        //     assert(v2==v2i);
01332 
01333        for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1[i-j]==data[i-1]);
01334        for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2[i-j]==data[i-1]);
01335        for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1m[i-j]==data[i-1]);
01336        for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2m[i-j]==data[i-1]);
01337        for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1i[i-j]==data[i-1]);
01338        for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2i[i-j]==data[i-1]);
01339 
01340      }
01341      cout << "got through  first bit" << endl;
01342 
01343      for (int j(1) ; j <= seqSize  ; j++)
01344      {
01345        //       v1.clear(); v2.clear(); 
01346        //     v1m.clear(); v2m.clear();
01347        //     v1i.clear(); v2i.clear();
01348 
01349        testReader.extractSource(&v1, 1, 1, j );
01350        testReader.extractSource(&v2, 2, 1, j );
01351 
01352        testReaderMulti.extractSource(&v1m, 1, 1, j );
01353        testReaderMulti.extractSource(&v2m, 2, 1, j );
01354 
01355        testIndexReader.extractSource(&v1i, 1, 1, j );
01356        testIndexReader.extractSource(&v2i, 2, 1, j );
01357 
01358        //       assert(v1==v2);
01359 
01360        //       assert(v1==v1m);
01361        //   assert(v2==v2m);
01362 
01363        //       assert(v1==v1i);
01364        //   assert(v2==v2i);
01365 
01366        for ( int i(1) ; i < j ; i++ ) assert(v1[i]==data[i]);
01367        for ( int i(1) ; i < j ; i++ ) assert(v2[i]==data[i]);
01368        for ( int i(1) ; i < j ; i++ ) assert(v1m[i]==data[i]);
01369        for ( int i(1) ; i < j ; i++ ) assert(v2m[i]==data[i]);
01370        for ( int i(1) ; i < j ; i++ ) assert(v1i[i]==data[i]);
01371        for ( int i(1) ; i < j ; i++ ) assert(v2i[i]==data[i]);
01372 
01373      }
01374      cout << "got through second bit" << endl;
01375 
01376 
01377 
01378    }
01379    cout << "Test passed!" << endl << endl; 
01380 
01381    // ---
01382 
01383    cout << "Test " << ++numTests 
01384         <<": test that SourceReader functionality works with getNextSequence" 
01385          << endl << endl;
01386 
01387    {
01388      string data=
01389        "tggtaaaaaaggttttcctgcatattaatatcatggtataggtgattgctgatggtatgtctttcttttactagatcatgcactgttcaggagcagaaaaatgtggctgttttttccaccacatattgtcctactactgtgtaccctgtctcagtaaatgccagtcagttacccaagctaaatgtccttgattcttccattctttcactacttccctggttcacattccagttggccaccaggtcatacttattttatcttaaaaattctctctctgaaatgcaaatcaaaactacgatgagataccatctcaaaccagttagtttgagatggcagttattaaaaagttcaaacataacagatgctgcgaggttgtggagaaaagaaaacacttatgcactgttggtgggagtgtaaattaattcaaccattgtggaaagtagtgtggtgatttctcaaagagctgaaagcagaactaccaatcaatccagccattccattactg-gtatatacccagaggaatataaattgttctatcataaagacacatgcataggtgtgtccattgtatcactactta-catagcaaagacatgaaatcaaccta-atgcccactggtgatagac";
01390 
01391      SequenceReaderFasta testReader("test_extract.fasta");
01392      SequenceReaderMulti testReaderMulti(cout);
01393      testReaderMulti.addReader(testReader.clone());
01394      SourceReaderIndex testIndexReader("test_index2");
01395 
01396      WordSequence w1, w2;
01397      //     vector<char> v1, v2;
01398      char* v1; char* v2;
01399 
01400      int numSeqs(4);
01401      int numChars(100);
01402 
01403      string n1,n2;
01404 
01405      for (int i(0);i<numSeqs; i++)
01406      {
01407        cout << "i: " << i << endl;
01408        w1.clear(); w2.clear();
01409 
01410        assert(testReader.getLastSequenceNumber()==i);
01411        assert(testReaderMulti.getLastSequenceNumber()==i);
01412 
01413        assert(testReader.findSequence(i+1)==true);
01414        assert(testReaderMulti.findSequence(i+1)==true);
01415 
01416        assert(testReader.getLastSequenceNumber()==i);
01417        assert(testReaderMulti.getLastSequenceNumber()==i);
01418 
01419      
01420        assert(testReader.getNextSequence( w1, 10 )!=-1);
01421        assert(testReaderMulti.getNextSequence( w2, 10 )!=-1);
01422        assert(w1==w2);
01423     
01424        assert(testReader.getLastSequenceNumber()==i+1);
01425        assert(testReaderMulti.getLastSequenceNumber()==i+1);
01426        //       cout << "fred: " << testReader.getLastSequenceNumber()
01427        // << " " << testReaderMulti.getLastSequenceNumber() << endl;
01428        testReader.getLastSequenceName(n1);
01429        testReaderMulti.getLastSequenceName(n2);
01430        assert(n1==n2);
01431        cout << n1 << " " << n1.size() << endl;
01432        cout << testIndexReader.extractName(i+1) << endl;
01433        n2=(string)testIndexReader.extractName(i+1);
01434        cout << n2 << " " << n2.size() << endl;
01435        assert(n1==n2);
01436 
01437        for ( int j(1); j <= numSeqs ; j++ )
01438        {
01439          cout << "j: " << j << endl;
01440          //      v1.clear(); v2.clear();
01441          testReader.extractSource(&v1, j, 1, numChars );
01442          testReaderMulti.extractSource(&v2, j, 1, numChars );
01443          //      assert(v1==v2);
01444 
01445          //       cout << "fred2: " << testReader.getLastSequenceNumber()
01446          // << " " << testReaderMulti.getLastSequenceNumber() << endl;
01447 
01448          assert(testReader.getLastSequenceNumber()==i+1);
01449          assert(testReaderMulti.getLastSequenceNumber()==i+1);
01450 
01451          for ( int k(0) ; k < numChars ; k++ ) assert(v1[k]==data[k]);
01452          for ( int k(0) ; k < numChars ; k++ ) assert(v2[k]==data[k]);
01453 
01454 
01455        } // ~for j
01456 
01457      } // ~for i
01458 
01459 
01460 
01461 
01462 
01463 
01464 
01465    }
01466    cout << "Test passed!" << endl << endl; 
01467 
01468    // ---
01469 
01470 
01471 
01472 
01473 
01474     return (0);
01475 
01476     } // ~try
01477     catch (const SSAHAException& err )
01478     {
01479       cout << "Caught SSAHA exception: " << err.what() << "\n";
01480       exit(1);
01481     }
01482     catch (const std::exception& err )
01483     {
01484       cout << "Caught exception: " << err.what() << "\n";
01485       exit(1);
01486     }
01487 
01488 
01489 } // main for testSequenceReaderFasta
01490 
01491 
01492 
01493 
01494 // Name:
01495 // Arguments:
01496 // TYPE  NAME  IN/OUT COMMENT
01497 // Returns: TYPE COMMENT
01498 
01499 // End of file testSequenceReaderFasta.cpp
01500 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2