00001 /* Last edited: Apr 18 16:51 2002 (ac2) */ 00002 00003 // ####################################################################### 00004 00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm 00006 // Version 3.2, released 1st March 2004 00007 // Copyright (c) Genome Research 2002 00008 00009 // SSAHA is free software; you can redistribute it and/or modify 00010 // it under the terms of version 2 of the GNU General Public Licence 00011 // as published by the Free Software Foundation. 00012 00013 // This program is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 // GNU General Public Licence for more details. 00017 00018 // You should have received a copy of the GNU General Public Licence 00019 // along with this program; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt 00022 00023 // ####################################################################### 00024 00025 // Module Name : SequenceReader 00026 // File Name : SequenceReader.h 00027 // Language : C++ 00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk) 00029 00030 // Include guard: 00031 #ifndef INCLUDED_SequenceReader 00032 #define INCLUDED_SequenceReader 00033 00034 // Description: 00035 00036 // Includes: 00037 00038 // NB it is good practise for #include statements in header files to be 00039 // replaced by forward declarations if at all possible 00040 #include <GlobalDefinitions.h> 00041 #include <string> 00042 #include <iosfwd> 00043 //class ostream; 00044 class WordSequence; 00045 class SequenceReader; 00046 #include <HashTable.h> 00047 00048 // ### Class Declarations ### 00049 00050 class NumberOutOfRange : public SSAHAException 00051 { 00052 public: 00053 NumberOutOfRange() : 00054 SSAHAException 00055 ("Requested sequence number exceeds number of sequences in file") {} 00056 }; 00057 00058 // Class Name : SequenceReaderMode 00059 // Description: SequenceReaderMode and its subclasses encapsulate the 00060 // various policies for dealing with misread characters: ignore, replace with 00061 // another character, report to monitoring stream. Subclasses define the 00062 // actual behaviour, while SequenceReaderMode itself defines the common 00063 // interface 00064 class SequenceReaderMode 00065 { 00066 public: 00067 SequenceReaderMode( ostream& monStream = cout ): 00068 monitoringStream_( monStream ) 00069 {} 00070 virtual ~SequenceReaderMode() {} 00071 // Check does three things: 00072 // Possibly: if thisChar is not valid, modify it to a valid base 00073 // Possibly: print a message if thisChar is not valid 00074 // Always: return true if thisChar is to be processed, else false 00075 // Its exact behaviour depends on the subclass 00076 // bool check( char& thisChar ) 00077 // { 00078 // if ( (thisChar == 'A' ) || (thisChar == 'a') 00079 // || (thisChar == 'C' ) || (thisChar == 'c') 00080 // || (thisChar == 'G' ) || (thisChar == 'g') 00081 // || (thisChar == 'T' ) || (thisChar == 't') ) return true; 00082 // return mismatch(thisChar); 00083 // } 00084 virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const = 0; 00085 virtual SequenceReaderMode* clone( void ) = 0; 00086 protected: 00087 ostream& monitoringStream_; 00088 }; // ~SequenceReaderMode 00089 00090 // Class Name : SequenceReaderModeIgnore 00091 // Description: Simplest subclass of SequenceReaderMode - if a non-base 00092 // character is read, do nothing! 00093 class SequenceReaderModeIgnore : public SequenceReaderMode 00094 { 00095 public: 00096 SequenceReaderModeIgnore( ostream& monStream = cerr ): 00097 SequenceReaderMode( monStream ) 00098 {} 00099 virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const 00100 { 00101 DEBUG_L3("SequenceReaderModeIgnore::mismatch"); 00102 return false; 00103 } 00104 virtual SequenceReaderMode* clone( void ) 00105 { 00106 return new SequenceReaderModeIgnore(*this); 00107 } // ~clone 00108 00109 }; // ~SequenceReaderModeIgnore 00110 00111 // Class Name : SequenceReaderModeReport 00112 // Description: Subclass of SequenceReaderMode - if a non-base 00113 // character is read, report to monitoring stream 00114 class SequenceReaderModeReport : public SequenceReaderMode 00115 { 00116 public: 00117 SequenceReaderModeReport( ostream& monStream = cout ): 00118 SequenceReaderMode( monStream ) 00119 {} 00120 virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const 00121 { 00122 DEBUG_L3("SequenceReaderModeReport::mismatch"); 00123 00124 monitoringStream_ << "Read unrecognized character (" 00125 << thisChar << ") from file" << endl; 00126 00127 return false; 00128 } // ~mismatch 00129 virtual SequenceReaderMode* clone( void ) 00130 { 00131 return new SequenceReaderModeReport(*this); 00132 } // ~clone 00133 00134 00135 }; // ~SequenceReaderModeReport 00136 00137 // Class Name : SequenceReaderModeReplace 00138 // Description: Subclass of SequenceReaderMode - if a non-base 00139 // character is read, silently replace it with a substitute 00140 class SequenceReaderModeReplace : public SequenceReaderMode 00141 { 00142 public: 00143 SequenceReaderModeReplace( uchar sub, ostream& monStream = cout ): 00144 SequenceReaderMode( monStream ), substitute_( sub ) 00145 {} 00146 SequenceReaderModeReplace( const SequenceReaderModeReplace& rhs ) : 00147 substitute_( rhs.substitute_ ) {} 00148 00149 virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const 00150 { 00151 DEBUG_L3("SequenceReaderModeReplace::mismatch"); 00152 if ( isgraph(thisChar) ) 00153 { 00154 thisChar = substitute_; 00155 // Bug fix TC 14.9.00: now returns true, because thisChar is now valid 00156 // as the substitution has been done 00157 return true; 00158 } 00159 else return false; 00160 } // ~mismatch 00161 virtual SequenceReaderMode* clone( void ) 00162 { 00163 return new SequenceReaderModeReplace(*this); 00164 } // ~clone 00165 protected: 00166 uchar substitute_; 00167 }; // ~SequenceReaderModeReplace 00168 00169 // Class Name : SequenceReaderModeReportReplace 00170 // Description: Subclass of SequenceReaderMode - if a non-base 00171 // character is read, replace it with a substitute and report to 00172 // monitoring stream 00173 class SequenceReaderModeReportReplace : public SequenceReaderMode 00174 { 00175 public: 00176 SequenceReaderModeReportReplace( uchar sub, ostream& monStream = cout ): 00177 SequenceReaderMode( monStream ), substitute_( sub ) 00178 {} 00179 SequenceReaderModeReportReplace 00180 ( const SequenceReaderModeReportReplace& rhs ) : 00181 substitute_( rhs.substitute_ ) {} 00182 virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const 00183 { 00184 DEBUG_L3("SequenceReaderModeReportReplace::mismatch"); 00185 if ( isgraph(thisChar) ) 00186 { 00187 monitoringStream_ << "Read unrecognized character (" 00188 << thisChar << ") from file, replacing with '" 00189 << substitute_ << "'.\n"; 00190 thisChar = substitute_; 00191 // Bug fix TC 14.9.00: now returns true, because thisChar is now valid 00192 // as the substitution has been done 00193 return true; 00194 } 00195 else return false; 00196 } // ~mismatch 00197 virtual SequenceReaderMode* clone( void ) 00198 { 00199 return new SequenceReaderModeReportReplace(*this); 00200 } // ~clone 00201 protected: 00202 uchar substitute_; 00203 }; // ~SequenceReaderModeReportReplace 00204 00205 // Class Name : SequenceReaderModeFlagReplace 00206 // Description: Subclass of SequenceReaderMode - if a non-base 00207 // character is read, silently replace it with a substitute and 00208 // set wordFlag (whichSequenceEncoder will OR with the Word 00209 // containing this letter 00210 class SequenceReaderModeFlagReplace : public SequenceReaderMode 00211 { 00212 public: 00213 SequenceReaderModeFlagReplace( uchar sub, ostream& monStream = cout ): 00214 SequenceReaderMode( monStream ), substitute_( sub ) 00215 {} 00216 SequenceReaderModeFlagReplace( const SequenceReaderModeFlagReplace& rhs ) : 00217 substitute_( rhs.substitute_ ) {} 00218 00219 virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const 00220 { 00221 DEBUG_L3("SequenceReaderModeFlagReplace::mismatch"); 00222 if ( isgraph(thisChar) ) 00223 { 00224 thisChar = substitute_; 00225 wordFlag = gCursedWord; 00226 return true; 00227 } 00228 else return false; 00229 } // ~mismatch 00230 virtual SequenceReaderMode* clone( void ) 00231 { 00232 return new SequenceReaderModeFlagReplace(*this); 00233 } // ~clone 00234 protected: 00235 uchar substitute_; 00236 }; // ~SequenceReaderModeFlagReplace 00237 00238 00239 00240 00241 00242 // ----------------------------- 00243 00244 // Class Name : SequenceReaderPrinter 00245 // Description: Purpose of this class and its subclasses is to provide an 00246 // intuitive interface between sequence information and output streams. 00247 // eg for SequenceReader myReader 00248 // cout << myReader.getName(22); 00249 // sends the name of sequence number 22 in myReader to standard output 00250 // (assuming there are at least 22 sequences!) 00251 // getName is a member object of SequenceReader that is an instance 00252 // of SequenceReaderNamePrinter. By overloading the () operator we make 00253 // it 'look like' a function. 00254 class SequenceReaderPrinter 00255 { 00256 public: 00257 // Constructor: called by the SequenceReader constructor 00258 SequenceReaderPrinter( SequenceReader* inReader ): 00259 pReader_( inReader ) 00260 {} 00261 00262 SequenceReaderPrinter 00263 ( const SequenceReaderPrinter& rhs ) : 00264 pReader_( rhs.pReader_ ), 00265 seqNum_( rhs.seqNum_ ) {} 00266 00267 00268 SequenceReaderPrinter& operator()( SequenceNumber inSeqNum ) 00269 { 00270 seqNum_ = inSeqNum; return *this; 00271 } // ~operator () 00272 00273 virtual void print( ostream& os ) = 0; 00274 00275 friend ostream& operator<<(ostream& os, SequenceReaderPrinter& inPrinter ) 00276 { 00277 inPrinter.print( os ); 00278 return os; 00279 } // ~operator << 00280 00281 protected: 00282 SequenceReader* pReader_; 00283 SequenceNumber seqNum_; 00284 }; // ~class SequenceReaderPrinter 00285 00286 // Class Name : SequenceReaderNamePrinter 00287 // Description: Send the name of sequence seqNum_ to the ostream os 00288 // by calling SequenceReader virtual member function printName 00289 class SequenceReaderNamePrinter: public SequenceReaderPrinter 00290 { 00291 public: 00292 SequenceReaderNamePrinter( SequenceReader* inReader ) : 00293 SequenceReaderPrinter( inReader ) {} 00294 00295 virtual void print( ostream& os ); 00296 00297 }; // ~class SequenceReaderNamePrinter 00298 00299 // Class Name : SequenceReaderSideInfoPrinter 00300 // Description: Send the side info for sequence seqNum_ to the ostream os 00301 // by calling SequenceReader virtual member function printSideInfo 00302 class SequenceReaderSideInfoPrinter: public SequenceReaderPrinter 00303 { 00304 public: 00305 SequenceReaderSideInfoPrinter( SequenceReader* inReader ) : 00306 SequenceReaderPrinter( inReader ) {} 00307 00308 virtual void print( ostream& os ); 00309 00310 }; // ~class SequenceReaderSideInfoPrinter 00311 00312 // Class Name : SequenceReaderSideInfoPrinter 00313 // Description: Send the source file (ie the raw ASCII) 00314 // for sequence seqNum_ to the ostream os by calling SequenceReader virtual 00315 // member function printSource 00316 class SequenceReaderSourcePrinter: public SequenceReaderPrinter 00317 { 00318 public: 00319 SequenceReaderSourcePrinter( SequenceReader* inReader ) : 00320 SequenceReaderPrinter( inReader ) {} 00321 00322 virtual void print( ostream& os ); 00323 00324 }; // ~SequenceReaderSourcePrinter 00325 00326 00327 // --------------------------- 00328 // Class Name : SourceReader 00329 // Description: This is an abstract class that gives access to the original 00330 // ASCII data that was used to produce the 2 bit per base sequence data 00331 class SourceReader 00332 { 00333 public: 00334 // if the position of the source buffer gets within 00335 // resizeCacheThreshold_ of the current cache size then the 00336 // current cache size is multiplied by 1.5 00337 // The number of chars on a single line of fasta is thus 00338 // effectively limited to resizeCacheThreshold_ 00339 enum 00340 { 00341 sourceBufferSize_ = 20000, 00342 resizeCacheThreshold_ = 5000, 00343 nameBufferSize_ = 2000 00344 }; 00345 00346 SourceReader( void ) : lastSourceSeqNum_(0) {} 00347 00348 virtual ~SourceReader() {} 00349 // SeqInfo - data structure that stores the file number and position 00350 // of each indexed sequence 00351 struct SeqIndexInfo 00352 { 00353 unsigned short fileNum; 00354 // NB this limits you to 2^32 ~= 4GB of seq in a single file 00355 // change seqPos to a std::streampos for more 00356 unsigned int seqPos; 00357 }; 00358 00359 // Function Name: extractSource 00360 // This extracts the source data for bases seqStart to seqEnd inclusive 00361 // of sequence seqNum and places it in source 00362 virtual void extractSource 00363 ( char** pSource, //vector<char>& source, 00364 SequenceNumber seqNum, 00365 SequenceOffset seqStart, 00366 SequenceOffset seqEnd ); 00367 00368 // Function Name: extractSourceReverse 00369 // This extracts the source data using extractSource as above, then 00370 // reverse complements it. 00371 void extractSourceReverse 00372 ( char** pSource, //vector<char>& source, 00373 SequenceNumber seqNum, 00374 SequenceOffset seqStart, 00375 SequenceOffset seqEnd ); 00376 00377 // Function Name: saveIndex 00378 // save the location of the start of each sequence in a file 00379 // sets up ofstream and calls the virtual saveIndexImp below 00380 void saveIndex( const string& fileName ); 00381 00382 // Function Name: extractToCache 00383 // Read in a sequence to lastSourceSeq_; 00384 void extractToCache( istream* pCurrentFile); 00385 00386 // Function Name: saveIndexImp 00387 // Actually save the indexing data to disk. Implemented 00388 // for SequenceReaderFile and SequenceReaderMulti, not 00389 // for SourceReaderIndex 00390 virtual void saveIndexImp 00391 ( ostream& filesFile, 00392 ostream& indexFile, 00393 int& fileNumber ); 00394 00395 protected: 00396 int numCols_; 00397 // data to support SourceReader functionality 00398 vector<char> lastSourceSeq_; 00399 vector<char> reverseBuffer_; 00400 SequenceNumber lastSourceSeqNum_; 00401 char nameBuffer_[ nameBufferSize_ ]; 00402 }; 00403 00404 00405 00406 // --------------------------- 00407 // Class Name : SourceReaderIndex 00408 // Description: Access source data using an index that gives entry points into 00409 // a collection of files 00410 00411 00412 class SourceReaderIndex : public SourceReader 00413 { 00414 00415 public: 00416 SourceReaderIndex( const string& fileName ); 00417 virtual ~SourceReaderIndex(); 00418 00419 virtual void extractSource( char** pSource,//vector<char>& source, 00420 SequenceNumber seqNum, 00421 SequenceOffset seqStart, 00422 SequenceOffset seqEnd ); 00423 00424 const char* extractName( SequenceNumber seqNum ); 00425 // NB no definition of saveIndex in this class 00426 SequenceNumber size( void ) const { return index_.size(); } 00427 protected: 00428 vector<string*> fileNames_; 00429 vector<SeqIndexInfo> index_; 00430 SequenceNumber currentFileNum_; 00431 char inputBuffer_[ sourceBufferSize_ ]; 00432 string lastName_; 00433 // reading a new sequence changes lastNameSeqNum_ (since the name has to be 00434 // read to read the sequence_) but reading a new name does not change 00435 // lastSourceSeqNum_ 00436 SequenceNumber lastNameSeqNum_; 00437 00438 ifstream* pCurrentFile_; 00439 int numSeqs_; 00440 }; 00441 00442 class SequenceReaderState; 00443 00444 // ----------------------------- 00445 00446 // Class Name : SequenceReader 00447 // Description: This is an abstract class that specifies the interface via 00448 // via which the software will read the sequence data. 00449 class SequenceReader : public SourceReader 00450 { 00451 00452 // PUBLIC MEMBER FUNCTIONS 00453 public: 00454 00455 // enum Constants{ notCalculatedYet_ = -9999 }; 00456 00457 // Constructors and Destructors 00458 00459 // Function Name: Constructor 00460 // Arguments: ostream& 00461 SequenceReader( ostream& monitoringStream = cerr ); 00462 00463 // Function Name: Copy constructor 00464 // Arguments: 00465 // A copy constructor is required for all subclasses of SequenceReader. 00466 // This is because it is used by subclass SequenceReaderMulti (which forms 00467 // an aggregate of SequenceReader instances 00468 SequenceReader( const SequenceReader& rhs ); 00469 00470 00471 // Function Name: Destructor 00472 // Arguments: 00473 virtual ~SequenceReader(); 00474 // (NB destructor should be virtual if class is to be derived from) 00475 00476 // Manipulator Functions 00477 virtual SequenceReader* clone( void ) = 0; 00478 00479 00480 // Function Name: 00481 // Arguments: 00482 // TYPE NAME IN/OUT COMMENT 00483 // Returns: TYPE COMMENT 00484 00485 // Function Name: changeMode 00486 // Arguments: const SequenceReaderMode& 00487 // Makes a copy of mode and uses it to handle mismatch character reads 00488 virtual void changeMode( SequenceReaderMode* pMode ) = 0; 00489 00490 00491 // Accessor Functions 00492 // (NB all accessor functions should be 'const') 00493 00494 // Function Name: rewind 00495 // Arguments: void 00496 // Returns: void 00497 // Rewind to the start of the data file, so that getNextSequence will 00498 // return the first sequence in the file 00499 virtual void rewind( void ) = 0; 00500 00501 // Function Name: findSequence 00502 // Arguments: SequenceNumber (in) 00503 // Returns: void 00504 // Winds the input file stream to the start of sequence number seqNum. 00505 // Returns false if seqNum exceeds the number of sequences in 00506 // the file. 00507 virtual bool findSequence( SequenceNumber seqNum ) = 0; 00508 00509 // Function Name: getNextSequence 00510 // Arguments: WordSequence& (out), int (in) 00511 // Returns: int 00512 // Read the next set of sequence information from the file and parse it 00513 // into WordSequence format. Returns -1 if there has been a problem with 00514 // reading the sequence, else returns the number of valid base pairs 00515 // contained within the final word of the sequence. 00516 virtual int getNextSequence( WordSequence& nextSeq, int wordLength ) = 0; 00517 00518 // Function Name: getNextSequence 00519 // Arguments: WordSequence& (out), HashTable& (in) 00520 // Returns: int 00521 // Read the next set of sequence information from the file and parse it 00522 // into WordSequence format, using word length required by hashTable 00523 int getNextSequence( WordSequence& nextSeq, const HashTable& hashTable ); 00524 00525 // Function Name: getSequence 00526 // Arguments: WordSequence& (out), SequenceNumber (in), int (in) 00527 // Returns: bool 00528 // Read the sequenceNumber-th set of sequence information from the file and 00529 // parse it into WordSequence format 00530 virtual int getSequence 00531 ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength ) = 0; 00532 00533 // Function Name: getSequence 00534 // Arguments: WordSequence& (out), int (in), const HashTable& (in) 00535 // Returns: bool 00536 // Read the sequenceNumber-th set of sequence information from the file and 00537 // parse it into WordSequence format, getting word length from hashTable 00538 int getSequence 00539 ( WordSequence& nextSeq, 00540 SequenceNumber sequenceNumber, 00541 const HashTable& hashTable ); 00542 00543 // Function Name: getLastSequenceNumber 00544 // Arguments: void 00545 // Returns: int 00546 // Returns the position in the data file of the last sequence read 00547 SequenceNumber getLastSequenceNumber( void ) const 00548 { 00549 return lastSequenceNumber_; 00550 } 00551 00552 // Function Name: areAllSequencesRead 00553 // Arguments: void 00554 // Returns: bool 00555 // Returns true if the end of the file has been reached 00556 bool areAllSequencesRead( void ) const 00557 { 00558 return allSequencesRead_; 00559 } 00560 00561 // Function Name: getNumSequencesInFile 00562 // Arguments: void 00563 // Returns: SequenceNumber 00564 // Returns the number of sequences in the file (done by lazy 00565 // initialization, i.e. will only be calculated if asked for). 00566 // NB current place in file will be lost. 00567 SequenceNumber getNumSequencesInFile( void ) 00568 { 00569 if ( allSequencesRead_ == false ) 00570 { 00571 numSequencesInFile_ = computeNumSequencesInFile(); 00572 allSequencesRead_ = true; 00573 } 00574 return numSequencesInFile_; 00575 } // ~getNumSequencesInFile( void ) 00576 00577 // Function Name: getLastSequenceName 00578 // Arguments: string& (out) 00579 // Returns: void 00580 // Fills the string with the name of the last sequence read 00581 virtual void getLastSequenceName( string& seqName ) const = 0; 00582 00583 // Function Name: getBitsPerSymbol 00584 // Arguments: none 00585 // Returns: int 00586 // Returns number of bits per symbol used in encoding 00587 virtual int getBitsPerSymbol ( void ) const = 0; 00588 00589 // Function Name: getSourceDataType 00590 // Arguments: none 00591 // Returns: SourceDataType 00592 // Returns type of data being encoded (protein or DNA) 00593 virtual SourceDataType getSourceDataType( void ) const = 0; 00594 00595 SequenceReaderNamePrinter getName; 00596 SequenceReaderSideInfoPrinter getSideInfo; 00597 SequenceReaderSourcePrinter getSource; 00598 00599 // Function Name: printName 00600 // Arguments: string& (out), SequenceNumber (in) 00601 // Returns: void 00602 // Fills a string with the name of the requested sequence 00603 virtual bool printName( ostream& os, SequenceNumber seqNum ) = 0; 00604 00605 // Function Name: printSideInfo 00606 // Arguments: string& (out), SequenceNumber (in) 00607 // Returns: void 00608 // Fills a string with the name of the requested sequence 00609 virtual bool printSideInfo( ostream& os, SequenceNumber seqNum ) = 0; 00610 00611 // Function Name: printSource 00612 // Arguments: string& (out), SequenceNumber (in) 00613 // Returns: void 00614 // Fills a string with the name of the requested sequence 00615 virtual bool printSource( ostream& os, SequenceNumber seqNum ) = 0; 00616 00617 // encodeBases task now done by SequenceEncoder::encode - TC 14.3.1 00618 // Function Name: encodeBases 00619 // Arguments: WordSequence& (out), const TYPE& (in), int (in), int(in) 00620 // Converts sequence data from character format into binary format and 00621 // places it in seq. Making this function a template function means that 00622 // the same function can be used to read from character arrays or strings. 00623 // template <class TYPE> 00624 // void encodeBases 00625 // ( WordSequence& seq, const TYPE& data, int wordLength, int numChars, 00626 // int& basesInSequence = 0 ); 00627 00628 //void encodeBases 00629 //( WordSequence& seq, const string& data, int wordLength, int numChars, 00630 // int& basesInSequence ); 00631 //void encodeBases 00632 //( WordSequence& seq, const char* data, int wordLength, int numChars, 00633 // int& basesInSequence ); 00634 00635 // Functions to deal with state information 00636 00637 // Function Name: saveState 00638 // Arguments: void 00639 // Returns: SequenceReaderState* 00640 // saves the state (ie current file position) of a SequenceReader for future 00641 // restoration 00642 virtual SequenceReaderState* saveState( void ) const 00643 { assert (1==0); return NULL;} 00644 00645 // Function Name: restoreState 00646 // Arguments: SequenceReaderState* 00647 // Returns: void 00648 // restores the state (ie current file position) of a SequenceReader 00649 // then (NB!!) deletes *pState; 00650 virtual void restoreState( SequenceReaderState* pState ) 00651 { assert (1==0);} 00652 00653 00654 00655 00656 // PROTECTED MEMBER FUNCTIONS 00657 // (visible to this class and derived classes only) 00658 protected: 00659 00660 // Function Name: computeNumSequencesInFile 00661 // Arguments: void 00662 // Returns: int 00663 // Returns the number of sequences in the file - called by 00664 // getNumSequencesInFile. NB this will lose the current place in the file. 00665 virtual SequenceNumber computeNumSequencesInFile( void ) = 0; 00666 00667 00668 // PRIVATE MEMBER FUNCTIONS 00669 // (visible to instances of this class only) 00670 00671 private: 00672 SequenceReader& operator=(const SequenceReader&); // NOT IMPLEMENTED 00673 00674 // PROTECTED DATA: 00675 // (visible to instances of this class only) 00676 protected: 00677 00678 // lastSequenceNumber_ is (surprise) the number of the last sequence that 00679 // was read, and thus indicates the current position in the file. A value 00680 // of zero indicates we are at the beginning of the file 00681 SequenceNumber lastSequenceNumber_; 00682 00683 // allSequencesRead_ is initially set to false. Once the end of the file 00684 // has been reached for the first time it is set to true and 00685 // numSequencesInFile_ is filled in. 00686 bool allSequencesRead_; 00687 00688 // numSequencesInFile_: we want to avoid calculating this unless we 00689 // absolutely have to, as it involves scanning all the way to the end of 00690 // the file, which can be slow for large files. 00691 SequenceNumber numSequencesInFile_; 00692 ostream& monitoringStream_; 00693 // SequenceReaderMode* pState_; now handled by SequenceEncoder 00694 00695 // PRIVATE MEMBER DATA 00696 private: 00697 00698 }; // SequenceReader 00699 00700 00701 // ----------------------------- 00702 // Class Name : SequenceReaderState 00703 // Description: This preserves the state (ie position in a file) of 00704 // a SequenceReader 00705 class SequenceReaderState 00706 { 00707 public: 00708 SequenceReaderState( SequenceNumber lsn ) : 00709 lastSequenceNumber_(lsn) {} 00710 virtual ~SequenceReaderState() {} 00711 const SequenceNumber lastSequenceNumber_; 00712 // no point in making this private as it's const 00713 }; 00714 00715 00716 00717 00718 // ### Function Declarations ### 00719 00720 // Name: 00721 // Arguments: 00722 // TYPE NAME IN/OUT COMMENT 00723 // Returns: TYPE COMMENT 00724 00725 // End of include guard: 00726 #endif 00727 00728 // End of file SequenceReader.h 00729 00730 00731 00732 00733
1.5.2