HashTable/testHashTable.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : testHashTable
00025 // File Name    : testHashTable.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "HashTable.h"
00034 #include "GlobalDefinitions.h"
00035 #include "TimeStamp.h"
00036 #include "assert.h"
00037 #include <string>
00038 #include <strstream>
00039 #include <iostream>
00040 
00041 // ### Function Definitions ###
00042 
00043 // Create a dummy version of hash table 
00044 class HashTableTest : public HashTable
00045 {
00046 public:
00047   HashTableTest( int wordLength )
00048   {
00049     wordLength_ = wordLength;
00050   }
00051 
00052   //  void createHashTable
00053   //  ( SequenceReader& sequenceReader, int wordLength, int maxNumHits )
00054   //  {
00055   //  }
00056 
00057   // Function Name: loadHashTable
00058   // Arguments: const string& (in)
00059   // Reads a pre-computed hash table into memory from a file
00060   void loadHashTable( const string& fileNameRoot )
00061   {}
00062 
00063   // Function Name: saveHashTable
00064   // Arguments: const string& (in)
00065   // Saves a hash table to a file (for subsequent retrieval by loadHashTable)
00066   void saveHashTable( const string& fileNameRoot )
00067   {}
00068   
00069   // Accessor Functions
00070   // (NB all accessor functions should be 'const')
00071 
00072   void getSequenceName(string& seqName, SequenceNumber seqNum) const 
00073   {}
00074 
00075 
00076   // Function Name: matchWord
00077   // Arguments: Word (in), HitList& (out)
00078   // Populates hitsFound with the positions in the subject sequence database 
00079   // of all occurrences of the Word queryWord.
00080   //  void matchWord
00081   // ( Word queryWord, HitList& hitsFound, int baseOffset=0 ) const
00082   // {}
00083 
00084   // Function Name: matchWord
00085   // Arguments: WordSequence& (in), HitList& (out)
00086   // Populates hitsFound with the positions in the database of all occurrences
00087   // of the Words in the WordSequence queryWords. baseOffset is the initial 
00088   // shift in base pairs to be subtracted from all hit positions (TBD explain
00089   // this better!)
00090   //  void matchWord
00091   // ( const WordSequence& queryWords, 
00092   //   HitList& hitsFound, 
00093   //  int baseOffset = 0 ) const
00094   // {}
00095 
00096   void countWords(SequenceAdapter &) {}
00097   void hashWords(SequenceAdapter &, unsigned int, unsigned int *) {}
00098 
00099 
00100 };
00101 
00102 //void HashTableView<PositionInDatabase>::hashWords
00103 //void HashTable::hashWords
00104 //(SequenceAdapter &, unsigned int, unsigned int *)
00105 //{}
00106 //void HashTableView<PositionInDatabase>::countWords
00107 //void HashTable::countWords
00108 //(SequenceAdapter &)    
00109 //{}
00110 
00111 
00112 
00113 
00114 int main( void )
00115 {
00116 
00117   Timer clock;
00118 
00119   int numTests = 0;
00120 
00121   cout << "*************************************" << endl << endl;
00122   cout << "Test of functions in module HashTable" << endl << endl;
00123   cout << "*************************************" << endl << endl;
00124 
00125   const int wordLength(10);
00126   std::ostrstream buffer;
00127   string s1,s2,s3,s4,s5;
00128   Word w1,w2,w3,w4,w5;
00129 
00130   // ---
00131 
00132   cout << "Test " << ++numTests <<": test of function printWord" 
00133        << endl << endl;
00134 
00135   w1 = 237148; 
00136   // This is binary 00 11 10 01 11 10 01 01 11 00
00137   // encodes as     A  T  G  C  T  G  C  C  T  A
00138 
00139   buffer << printWord(w1,wordLength) << ends;
00140   s1=buffer.str();
00141   buffer.freeze(false); // release memory to write further stuff to buffer  
00142   buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00143  
00144   cout << "Word " << w1 << " encodes as "<< s1 << endl;
00145 
00146   assert(s1=="ATGCTGCCTA");
00147     
00148   cout << "Test passed!" << endl << endl; 
00149 
00150   // ---
00151 
00152   cout << "Test " << ++numTests <<": test of function makeWord" 
00153        << endl << endl;
00154 
00155 
00156   s2 = "ATGCTGCCTA";
00157   w2 = makeWord(s2);         
00158   cout << "String " << s2 << " encodes as " << w2 << endl;
00159   assert(w1==w2);
00160 
00161   s2 = "AtGcTgCcTA";
00162   w2 = makeWord(s2);         
00163   cout << "String " << s2 << " encodes as " << w2 << endl;
00164   assert(w1==w2);
00165     
00166   cout << "Test passed!" << endl << endl; 
00167 
00168   // ---
00169 
00170   cout << "Test " << ++numTests <<": test of HashTableTest creation" 
00171        << endl << endl;
00172 
00173   HashTableTest ht(wordLength);
00174   assert( ht.getWordLength() == wordLength );
00175 
00176   cout << "Test passed!" << endl << endl; 
00177 
00178   // ---
00179 
00180   cout << "Test " << ++numTests <<": test of WordSequence" 
00181        << endl << endl;
00182 
00183   s1="AGTTCGTCCA";  s2="TGCTAAGTCA";  s3="GGTCATTGCA";
00184   s4="CACGTGCACG";  s5="AGCTGGCTGG";
00185 
00186   w1 = makeWord(s1);  w2 = makeWord(s2);  w3 = makeWord(s3);
00187   w4 = makeWord(s4);  w5 = makeWord(s5);
00188 
00189   cout << "Words " << w1 << " "<< w2 << " "<< w3 << " "<< w4 << " "<< w5 
00190        << endl;
00191 
00192   WordSequence ws;
00193 
00194   ws.addWord(w1);  ws.addWord(w2);  ws.addWord(w3);  ws.addWord(w4);
00195   ws.addWord(w5);
00196   ws.setNumBasesInLast(10);// %%%%%
00197 
00198   cout << " encode as word sequence " << ws.getWord(0) << " " 
00199        << ws.getWord(1) << " " << ws.getWord(2) << " " 
00200        << ws.getWord(3) << " " << ws.getWord(4) << endl;
00201 
00202   assert(ws.getNumWords() == 5);
00203   assert(ws.getWord(0) == w1);
00204   assert(ws.getWord(1) == w2);
00205   assert(ws.getWord(2) == w3);
00206   assert(ws.getWord(3) == w4);
00207   assert(ws.getWord(4) == w5);
00208 
00209   buffer << printWord(ws,wordLength) << ends;
00210   string S(buffer.str());
00211   buffer.freeze(false); // release memory to write further stuff to buffer  
00212   buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00213 
00214   cout << "Words " << s1 << ","<< s2 << ","<< s3 << ","<< s4 << ","<< s5 
00215        << " encode to " << S << endl;
00216   assert(S==s1+s2+s3+s4+s5);
00217 
00218   cout << "Test passed!" << endl << endl; 
00219 
00220   // ---
00221 
00222   cout << "Test " << ++numTests <<": test of function reverseComplement" 
00223        << endl << endl;
00224 
00225 
00226   Word r = reverseComplement(w1,wordLength);
00227 
00228   cout << "Reverse complement of " << printWord(w1,ht)
00229        << " computed as " << printWord(r,ht) << endl;
00230   // NB Assumes answer is RC of s1 as defined above
00231   assert(r==makeWord("TGGACGAACT"));
00232 
00233   cout << "Reverse complement of WordSequence" << endl;
00234 
00235   string R;
00236 
00237     for ( string::reverse_iterator i(S.rbegin()) ; i != S.rend() ; i++ )
00238     {
00239            if ( *i == 'A' ) R += "T";
00240       else if ( *i == 'T' ) R += "A";
00241       else if ( *i == 'G' ) R += "C";
00242       else if ( *i == 'C' ) R += "G";
00243     } // ~for
00244 
00245    WordSequence rs;
00246    reverseComplement(ws,rs,wordLength);
00247 
00248    cout << "Original string: " << printWord(ws,ht) << endl
00249         << "Computed RC    : " << printWord(rs,ht) << endl
00250         << "Should be      : " << R << endl;
00251 
00252   buffer << printWord(rs,ht) << ends;
00253   assert(R==buffer.str());  
00254   buffer.freeze(false); // release memory to write further stuff to buffer  
00255   buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00256 
00257   cout << "Reverse complement of WordSequence, "
00258        << "assuming only 7 valid base pairs in last Word" << endl;
00259 
00260   WordSequence ws2(ws);
00261   ws2.back()=makeWord("AGCTGGCAAA");
00262   ws2.setNumBasesInLast(7);
00263 
00264   rs.clear();
00265   reverseComplement(ws2,rs,wordLength); //,7);
00266   R.erase(R.begin());  R.erase(R.begin());  R.erase(R.begin());
00267   //  R+="AAA";
00268 
00269   cout << "Original string: " << printWord(ws2,ht) << endl
00270        << "Computed RC    : " << printWord(rs,ht) << endl
00271        << "Should be      : " << R << endl;
00272 
00273   buffer << printWord(rs,ht) << ends;
00274   assert(R==buffer.str());  
00275   buffer.freeze(false); // release memory to write further stuff to buffer  
00276   buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00277 
00278   cout << "Test passed!" << endl << endl; 
00279 
00280   // ---
00281 
00282   cout << "Test " << ++numTests <<": test of function shiftSequence" 
00283        << endl << endl;
00284 
00285   cout << "Shift sequence by one base pair" << endl;
00286 
00287   S.erase(S.begin());
00288   S+=("A");
00289 
00290   shiftSequence(ws,gBaseBits,wordLength);
00291   buffer << printWord(ws,wordLength) << ends;
00292 
00293   cout << "Shifted sequence: " << printWord(ws,wordLength) << endl
00294        << "should be:        " << S << endl; 
00295 
00296   assert(S==buffer.str());  
00297   buffer.freeze(false); // release memory to write further stuff to buffer  
00298   buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00299 
00300   cout << "Shift sequence by another three base pairs" << endl;
00301 
00302   S.erase(S.begin());
00303   S.erase(S.begin());
00304   S.erase(S.begin());
00305   S+=("AAA");
00306 
00307   shiftSequence(ws,gBaseBits,wordLength,3);
00308   buffer << printWord(ws,wordLength) << ends;
00309 
00310   cout << "Shifted sequence: " << printWord(ws,wordLength) << endl
00311        << "should be:        " << S << endl; 
00312 
00313   assert(S==buffer.str());  
00314   buffer.freeze(false); // release memory to write further stuff to buffer  
00315   buffer.seekp(0,ios::beg); // ensure next write is to start of buffer
00316 
00317     
00318   cout << "Test passed!" << endl << endl; 
00319 
00320   cout << clock;
00321 
00322   for ( int i(0) ; i < 1000000 ; i ++ ) shiftSequence(ws,gBaseBits,3);
00323 
00324   cout << clock;
00325 
00326   cout << "*************************************" << endl << endl;
00327   cout << "           End of last test          " << endl << endl;
00328   cout << "*************************************" << endl << endl;
00329 
00330  
00331   return (0);
00332 
00333 }
00334 
00335 
00336 
00337 
00338 // End of file testHashTable.cpp
00339 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2