SequenceReader/SequenceReader.h

Go to the documentation of this file.
00001 /*  Last edited: Apr 18 16:51 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : SequenceReader
00026 // File Name    : SequenceReader.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_SequenceReader
00032 #define INCLUDED_SequenceReader
00033 
00034 // Description:
00035 
00036 // Includes:
00037 
00038 // NB it is good practise for #include statements in header files to be
00039 // replaced by forward declarations if at all possible
00040 #include <GlobalDefinitions.h>
00041 #include <string>
00042 #include <iosfwd>
00043 //class ostream;
00044 class WordSequence;
00045 class SequenceReader;
00046 #include <HashTable.h>
00047 
00048 // ### Class Declarations ###
00049 
00050 class NumberOutOfRange : public SSAHAException
00051 {
00052  public:
00053   NumberOutOfRange() :
00054     SSAHAException
00055     ("Requested sequence number exceeds number of sequences in file") {}
00056 };
00057 
00058 // Class Name : SequenceReaderMode
00059 // Description: SequenceReaderMode and its subclasses encapsulate the 
00060 // various policies for dealing with misread characters: ignore, replace with
00061 // another character, report to monitoring stream. Subclasses define the
00062 // actual behaviour, while SequenceReaderMode itself defines the common 
00063 // interface 
00064 class SequenceReaderMode
00065 {
00066   public: 
00067   SequenceReaderMode( ostream& monStream = cout ):
00068   monitoringStream_( monStream )
00069   {}
00070   virtual ~SequenceReaderMode() {}
00071   // Check does three things:
00072   // Possibly: if thisChar is not valid, modify it to a valid base 
00073   // Possibly: print a message if thisChar is not valid
00074   // Always: return true if thisChar is to be processed, else false
00075   // Its exact behaviour depends on the subclass
00076   //  bool check( char& thisChar )
00077   //  {
00078   //    if (     (thisChar == 'A' ) || (thisChar == 'a') 
00079   //         || (thisChar == 'C' ) || (thisChar == 'c') 
00080   //          || (thisChar == 'G' ) || (thisChar == 'g') 
00081   //         || (thisChar == 'T' ) || (thisChar == 't') ) return true;
00082   //   return mismatch(thisChar);
00083   // }
00084   virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const = 0;
00085   virtual SequenceReaderMode* clone( void ) = 0;
00086   protected:
00087   ostream& monitoringStream_;
00088 }; // ~SequenceReaderMode
00089 
00090 // Class Name : SequenceReaderModeIgnore
00091 // Description: Simplest subclass of SequenceReaderMode - if a non-base 
00092 // character is read, do nothing!
00093 class SequenceReaderModeIgnore : public SequenceReaderMode
00094 {
00095   public:
00096   SequenceReaderModeIgnore( ostream& monStream = cerr ):
00097   SequenceReaderMode( monStream )
00098   {}
00099   virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const
00100   {
00101     DEBUG_L3("SequenceReaderModeIgnore::mismatch");
00102     return false;
00103   }
00104   virtual SequenceReaderMode* clone( void )
00105   {
00106     return new SequenceReaderModeIgnore(*this);
00107   } // ~clone
00108 
00109 }; // ~SequenceReaderModeIgnore
00110 
00111 // Class Name : SequenceReaderModeReport
00112 // Description: Subclass of SequenceReaderMode - if a non-base 
00113 // character is read, report to monitoring stream
00114 class SequenceReaderModeReport : public SequenceReaderMode
00115 {
00116   public:
00117   SequenceReaderModeReport( ostream& monStream = cout ):
00118   SequenceReaderMode( monStream )
00119   {}
00120   virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const
00121   {
00122     DEBUG_L3("SequenceReaderModeReport::mismatch");
00123      
00124      monitoringStream_ << "Read unrecognized character (" 
00125                        << thisChar << ") from file" << endl;
00126 
00127      return false;
00128   } // ~mismatch
00129   virtual SequenceReaderMode* clone( void )
00130   {
00131     return new SequenceReaderModeReport(*this);
00132   } // ~clone
00133 
00134 
00135 }; // ~SequenceReaderModeReport
00136 
00137 // Class Name : SequenceReaderModeReplace
00138 // Description: Subclass of SequenceReaderMode - if a non-base 
00139 // character is read, silently replace it with a substitute
00140 class SequenceReaderModeReplace : public SequenceReaderMode
00141 {
00142   public:
00143   SequenceReaderModeReplace( uchar sub, ostream& monStream = cout ):
00144   SequenceReaderMode( monStream ), substitute_( sub )
00145   {}
00146   SequenceReaderModeReplace( const SequenceReaderModeReplace& rhs ) :
00147   substitute_( rhs.substitute_ ) {}
00148 
00149   virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const
00150   {
00151     DEBUG_L3("SequenceReaderModeReplace::mismatch");
00152      if ( isgraph(thisChar) )
00153      {
00154        thisChar = substitute_;
00155        // Bug fix TC 14.9.00: now returns true, because thisChar is now valid
00156        // as the substitution has been done
00157        return true;
00158      }
00159      else return false;
00160   } // ~mismatch
00161   virtual SequenceReaderMode* clone( void )
00162   {
00163     return new SequenceReaderModeReplace(*this);
00164   } // ~clone
00165   protected:
00166   uchar substitute_;
00167 }; // ~SequenceReaderModeReplace
00168 
00169 // Class Name : SequenceReaderModeReportReplace
00170 // Description: Subclass of SequenceReaderMode - if a non-base 
00171 // character is read, replace it with a substitute and report to 
00172 // monitoring stream
00173 class SequenceReaderModeReportReplace : public SequenceReaderMode
00174 {
00175   public:
00176   SequenceReaderModeReportReplace( uchar sub, ostream& monStream = cout ):
00177   SequenceReaderMode( monStream ), substitute_( sub )
00178   {}
00179   SequenceReaderModeReportReplace
00180   ( const SequenceReaderModeReportReplace& rhs ) :
00181   substitute_( rhs.substitute_ ) {}
00182   virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const
00183   {
00184     DEBUG_L3("SequenceReaderModeReportReplace::mismatch");
00185      if ( isgraph(thisChar) )
00186      {
00187        monitoringStream_ << "Read unrecognized character (" 
00188                          << thisChar << ") from file, replacing with '" 
00189                          << substitute_ << "'.\n";
00190        thisChar = substitute_;
00191        // Bug fix TC 14.9.00: now returns true, because thisChar is now valid
00192        // as the substitution has been done
00193        return true;
00194      }
00195      else return false;
00196   } // ~mismatch
00197   virtual SequenceReaderMode* clone( void )
00198   {
00199     return new SequenceReaderModeReportReplace(*this);
00200   } // ~clone
00201   protected:
00202   uchar substitute_;
00203 }; // ~SequenceReaderModeReportReplace
00204 
00205 // Class Name : SequenceReaderModeFlagReplace
00206 // Description: Subclass of SequenceReaderMode - if a non-base 
00207 // character is read, silently replace it with a substitute and
00208 // set wordFlag (whichSequenceEncoder will OR with the Word
00209 // containing this letter 
00210 class SequenceReaderModeFlagReplace : public SequenceReaderMode
00211 {
00212   public:
00213   SequenceReaderModeFlagReplace( uchar sub, ostream& monStream = cout ):
00214   SequenceReaderMode( monStream ), substitute_( sub )
00215   {}
00216   SequenceReaderModeFlagReplace( const SequenceReaderModeFlagReplace& rhs ) :
00217   substitute_( rhs.substitute_ ) {}
00218 
00219   virtual bool mismatch( uchar& thisChar, Word& wordFlag ) const
00220   {
00221     DEBUG_L3("SequenceReaderModeFlagReplace::mismatch");
00222      if ( isgraph(thisChar) )
00223      {
00224        thisChar = substitute_;
00225        wordFlag = gCursedWord;
00226        return true;
00227      }
00228      else return false;
00229   } // ~mismatch
00230   virtual SequenceReaderMode* clone( void )
00231   {
00232     return new SequenceReaderModeFlagReplace(*this);
00233   } // ~clone
00234   protected:
00235   uchar substitute_;
00236 }; // ~SequenceReaderModeFlagReplace
00237 
00238 
00239 
00240 
00241 
00242 // -----------------------------
00243 
00244 // Class Name : SequenceReaderPrinter
00245 // Description: Purpose of this class and its subclasses is to provide an
00246 // intuitive interface between sequence information and output streams.
00247 // eg for SequenceReader myReader
00248 // cout << myReader.getName(22);
00249 // sends the name of sequence number 22 in myReader to standard output
00250 // (assuming there are at least 22 sequences!)
00251 // getName is a member object of SequenceReader that is an instance
00252 // of SequenceReaderNamePrinter. By overloading the () operator we make
00253 // it 'look like' a function.
00254 class SequenceReaderPrinter
00255 {
00256     public:
00257     // Constructor: called by the SequenceReader constructor 
00258     SequenceReaderPrinter( SequenceReader* inReader ):
00259       pReader_( inReader ) 
00260     {}
00261 
00262     SequenceReaderPrinter
00263     ( const SequenceReaderPrinter& rhs ) :
00264     pReader_( rhs.pReader_ ),
00265     seqNum_( rhs.seqNum_ ) {}
00266 
00267 
00268     SequenceReaderPrinter& operator()( SequenceNumber inSeqNum )
00269     { 
00270       seqNum_ = inSeqNum; return *this; 
00271     } // ~operator ()
00272 
00273     virtual void print( ostream& os ) = 0;
00274 
00275     friend ostream& operator<<(ostream& os, SequenceReaderPrinter& inPrinter )
00276     {
00277       inPrinter.print( os );
00278       return os;
00279     } // ~operator <<
00280   
00281     protected:
00282     SequenceReader* pReader_;
00283     SequenceNumber seqNum_;
00284 }; // ~class SequenceReaderPrinter
00285 
00286 // Class Name : SequenceReaderNamePrinter
00287 // Description: Send the name of sequence seqNum_ to the ostream os
00288 // by calling SequenceReader virtual member function printName
00289 class SequenceReaderNamePrinter: public SequenceReaderPrinter
00290 {
00291     public:
00292     SequenceReaderNamePrinter( SequenceReader* inReader ) :
00293     SequenceReaderPrinter( inReader ) {}
00294     
00295     virtual void print( ostream& os );
00296 
00297 }; // ~class SequenceReaderNamePrinter
00298 
00299 // Class Name : SequenceReaderSideInfoPrinter
00300 // Description: Send the side info for sequence seqNum_ to the ostream os
00301 // by calling SequenceReader virtual member function printSideInfo
00302 class SequenceReaderSideInfoPrinter: public SequenceReaderPrinter
00303 {
00304     public:
00305     SequenceReaderSideInfoPrinter( SequenceReader* inReader ) :
00306     SequenceReaderPrinter( inReader ) {}
00307     
00308     virtual void print( ostream& os );
00309 
00310 }; // ~class SequenceReaderSideInfoPrinter
00311 
00312 // Class Name : SequenceReaderSideInfoPrinter
00313 // Description: Send the source file (ie the  raw ASCII) 
00314 // for sequence seqNum_ to the ostream os by calling SequenceReader virtual 
00315 // member function printSource
00316 class SequenceReaderSourcePrinter: public SequenceReaderPrinter
00317 {
00318     public:
00319     SequenceReaderSourcePrinter( SequenceReader* inReader ) :
00320     SequenceReaderPrinter( inReader ) {}
00321    
00322     virtual void print( ostream& os );
00323 
00324 }; // ~SequenceReaderSourcePrinter
00325 
00326 
00327 // ---------------------------
00328 // Class Name : SourceReader
00329 // Description: This is an abstract class that gives access to the original
00330 // ASCII data that was used to produce the 2 bit per base sequence data
00331 class SourceReader
00332 {
00333  public:
00334   // if the position of the source buffer gets within 
00335   // resizeCacheThreshold_ of the current cache size then the
00336   // current cache size is multiplied by 1.5
00337   // The number of chars on a single line of fasta is thus 
00338   // effectively  limited to resizeCacheThreshold_ 
00339   enum
00340   { 
00341     sourceBufferSize_ = 20000, 
00342     resizeCacheThreshold_ = 5000, 
00343     nameBufferSize_ = 2000 
00344   };
00345 
00346   SourceReader( void ) : lastSourceSeqNum_(0) {}
00347 
00348   virtual ~SourceReader() {}
00349   // SeqInfo - data structure that stores the file number and position
00350   // of each indexed sequence 
00351   struct SeqIndexInfo
00352   {
00353     unsigned short fileNum;
00354     // NB this limits you to 2^32 ~= 4GB of seq in a single file
00355     // change seqPos to a std::streampos for more
00356     unsigned int seqPos;
00357   };
00358 
00359   // Function Name: extractSource
00360   // This extracts the source data for bases seqStart to seqEnd inclusive
00361   // of sequence seqNum and places it in source
00362   virtual void extractSource
00363     ( char** pSource, //vector<char>& source, 
00364     SequenceNumber seqNum,
00365     SequenceOffset seqStart,
00366     SequenceOffset seqEnd );
00367 
00368   // Function Name: extractSourceReverse
00369   // This extracts the source data using extractSource as above, then
00370   // reverse complements it.
00371   void extractSourceReverse
00372     ( char** pSource, //vector<char>& source, 
00373     SequenceNumber seqNum,
00374     SequenceOffset seqStart,
00375     SequenceOffset seqEnd );
00376 
00377   // Function Name: saveIndex
00378   // save the location of the start of each sequence in a file
00379   // sets up ofstream and calls the virtual saveIndexImp below
00380   void saveIndex( const string& fileName );
00381 
00382   // Function Name: extractToCache
00383   // Read in a sequence to lastSourceSeq_;
00384   void extractToCache( istream* pCurrentFile);
00385 
00386   // Function Name: saveIndexImp
00387   // Actually save the indexing data to disk. Implemented
00388   // for SequenceReaderFile and SequenceReaderMulti, not
00389   // for SourceReaderIndex
00390   virtual void saveIndexImp
00391   ( ostream& filesFile, 
00392     ostream& indexFile, 
00393     int& fileNumber );
00394 
00395  protected:
00396   int numCols_;
00397   // data to support SourceReader functionality
00398   vector<char> lastSourceSeq_;
00399   vector<char> reverseBuffer_;
00400   SequenceNumber lastSourceSeqNum_;
00401   char nameBuffer_[ nameBufferSize_ ];
00402 };
00403 
00404 
00405 
00406 // ---------------------------
00407 // Class Name : SourceReaderIndex
00408 // Description: Access source data using an index that gives entry points into
00409 // a collection of files
00410 
00411 
00412 class SourceReaderIndex : public SourceReader
00413 {
00414 
00415   public:
00416   SourceReaderIndex( const string& fileName );
00417   virtual ~SourceReaderIndex();
00418 
00419   virtual void extractSource( char** pSource,//vector<char>& source, 
00420                      SequenceNumber seqNum,
00421                      SequenceOffset seqStart,
00422                      SequenceOffset seqEnd );
00423 
00424   const char* extractName( SequenceNumber seqNum );
00425   // NB no definition of saveIndex in this class
00426   SequenceNumber size( void ) const { return index_.size(); }
00427  protected:
00428   vector<string*> fileNames_;
00429   vector<SeqIndexInfo> index_;
00430   SequenceNumber currentFileNum_;
00431   char inputBuffer_[ sourceBufferSize_ ];
00432   string lastName_;
00433   // reading a new sequence changes lastNameSeqNum_ (since the name has to be
00434   // read to read the sequence_) but reading a new name does not change
00435   // lastSourceSeqNum_
00436   SequenceNumber lastNameSeqNum_;
00437 
00438   ifstream* pCurrentFile_;
00439   int numSeqs_;
00440 };
00441 
00442 class SequenceReaderState;
00443 
00444 // -----------------------------
00445 
00446 // Class Name : SequenceReader
00447 // Description: This is an abstract class that specifies the interface via 
00448 // via which the software will read the sequence data.
00449 class SequenceReader : public SourceReader
00450 {
00451 
00452   // PUBLIC MEMBER FUNCTIONS
00453   public:
00454 
00455   //  enum Constants{ notCalculatedYet_ = -9999 };
00456 
00457   // Constructors and Destructors
00458 
00459   // Function Name: Constructor
00460   // Arguments: ostream&
00461   SequenceReader( ostream& monitoringStream = cerr );
00462 
00463   // Function Name: Copy constructor
00464   // Arguments:
00465   // A copy constructor is required for all subclasses of SequenceReader.
00466   // This is because it is used by subclass SequenceReaderMulti (which forms
00467   // an aggregate of SequenceReader instances
00468   SequenceReader( const SequenceReader& rhs );
00469 
00470 
00471   // Function Name: Destructor
00472   // Arguments:
00473   virtual ~SequenceReader(); 
00474   // (NB destructor should be virtual if class is to be derived from)
00475 
00476   // Manipulator Functions
00477   virtual SequenceReader* clone( void ) = 0;
00478 
00479 
00480   // Function Name:
00481   // Arguments:
00482   // TYPE  NAME  IN/OUT COMMENT
00483   // Returns: TYPE COMMENT
00484 
00485   // Function Name: changeMode
00486   // Arguments: const SequenceReaderMode&
00487   // Makes a copy of mode and uses it to handle mismatch character reads
00488   virtual void changeMode( SequenceReaderMode* pMode ) = 0;
00489 
00490 
00491   // Accessor Functions
00492   // (NB all accessor functions should be 'const')
00493 
00494   // Function Name: rewind
00495   // Arguments: void
00496   // Returns:   void
00497   // Rewind to the start of the data file, so that getNextSequence will
00498   // return the first sequence in the file
00499   virtual void rewind( void ) = 0;
00500 
00501   // Function Name: findSequence
00502   // Arguments: SequenceNumber (in)
00503   // Returns:   void
00504   // Winds the input file stream to the start of sequence number seqNum. 
00505   // Returns false if seqNum exceeds the number of sequences in
00506   // the file.
00507   virtual bool findSequence( SequenceNumber seqNum ) = 0;
00508 
00509   // Function Name: getNextSequence
00510   // Arguments: WordSequence& (out), int (in)
00511   // Returns:   int
00512   // Read the next set of sequence information from the file and parse it
00513   // into WordSequence format. Returns -1 if there has been a problem with
00514   // reading the sequence, else returns the number of valid base pairs 
00515   // contained within the final word of the sequence.
00516   virtual int getNextSequence( WordSequence& nextSeq, int wordLength ) = 0;
00517 
00518   // Function Name: getNextSequence
00519   // Arguments: WordSequence& (out), HashTable& (in)
00520   // Returns:   int
00521   // Read the next set of sequence information from the file and parse it
00522   // into WordSequence format, using word length required by hashTable
00523   int getNextSequence( WordSequence& nextSeq, const HashTable& hashTable );
00524 
00525   // Function Name: getSequence
00526   // Arguments: WordSequence& (out), SequenceNumber (in), int (in)
00527   // Returns:   bool
00528   // Read the sequenceNumber-th set of sequence information from the file and 
00529   // parse it into WordSequence format
00530   virtual int getSequence
00531   ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength ) = 0;
00532 
00533   // Function Name: getSequence
00534   // Arguments: WordSequence& (out), int (in), const HashTable& (in)
00535   // Returns:   bool
00536   // Read the sequenceNumber-th set of sequence information from the file and 
00537   // parse it into WordSequence format, getting word length from hashTable
00538   int getSequence
00539   ( WordSequence& nextSeq, 
00540     SequenceNumber sequenceNumber, 
00541     const HashTable& hashTable );
00542 
00543   // Function Name: getLastSequenceNumber
00544   // Arguments: void
00545   // Returns:   int
00546   // Returns the position in the data file of the last sequence read
00547   SequenceNumber getLastSequenceNumber( void ) const 
00548   { 
00549     return lastSequenceNumber_; 
00550   }
00551 
00552   // Function Name: areAllSequencesRead
00553   // Arguments: void
00554   // Returns:   bool
00555   // Returns true if the end of the file has been reached
00556   bool areAllSequencesRead( void ) const
00557   {
00558     return allSequencesRead_;
00559   }
00560 
00561   // Function Name: getNumSequencesInFile
00562   // Arguments: void
00563   // Returns:   SequenceNumber
00564   // Returns the number of sequences in the file (done by lazy 
00565   // initialization, i.e. will only be calculated if asked for).
00566   // NB current place in file will be lost.
00567   SequenceNumber getNumSequencesInFile( void )
00568   {
00569     if ( allSequencesRead_ == false )
00570     {
00571       numSequencesInFile_ = computeNumSequencesInFile();
00572       allSequencesRead_ = true;
00573     } 
00574     return numSequencesInFile_;
00575   } // ~getNumSequencesInFile( void )
00576 
00577   // Function Name: getLastSequenceName
00578   // Arguments: string& (out)
00579   // Returns:   void
00580   // Fills the string with the name of the last sequence read 
00581   virtual void getLastSequenceName( string& seqName ) const = 0;
00582 
00583   // Function Name: getBitsPerSymbol
00584   // Arguments: none
00585   // Returns:   int
00586   // Returns number of bits per symbol used in encoding
00587   virtual int getBitsPerSymbol ( void ) const = 0;
00588 
00589   // Function Name: getSourceDataType
00590   // Arguments: none
00591   // Returns:   SourceDataType
00592   // Returns type of data being encoded (protein or DNA)
00593   virtual SourceDataType getSourceDataType( void ) const = 0;
00594 
00595   SequenceReaderNamePrinter getName;
00596   SequenceReaderSideInfoPrinter getSideInfo;
00597   SequenceReaderSourcePrinter getSource;
00598  
00599   // Function Name: printName
00600   // Arguments: string& (out), SequenceNumber (in)
00601   // Returns:   void
00602   // Fills a string with the name of the requested sequence
00603   virtual bool printName( ostream& os, SequenceNumber seqNum ) = 0;
00604 
00605   // Function Name: printSideInfo
00606   // Arguments: string& (out), SequenceNumber (in)
00607   // Returns:   void
00608   // Fills a string with the name of the requested sequence
00609   virtual bool printSideInfo( ostream& os, SequenceNumber seqNum ) = 0;
00610 
00611   // Function Name: printSource
00612   // Arguments: string& (out), SequenceNumber (in)
00613   // Returns:   void
00614   // Fills a string with the name of the requested sequence
00615   virtual bool printSource( ostream& os, SequenceNumber seqNum ) = 0;
00616 
00617   // encodeBases task now done by SequenceEncoder::encode - TC 14.3.1
00618   // Function Name: encodeBases
00619   // Arguments: WordSequence& (out), const TYPE& (in), int (in), int(in)
00620   // Converts sequence data from character format into binary format and 
00621   // places it in seq. Making this function a template function means that
00622   // the same function can be used to read from character arrays or strings.
00623   //  template <class TYPE>
00624   //  void encodeBases
00625   // ( WordSequence& seq, const TYPE& data, int wordLength, int numChars, 
00626   //   int& basesInSequence = 0 );
00627 
00628   //void encodeBases
00629   //( WordSequence& seq, const string& data, int wordLength, int numChars,
00630   //  int& basesInSequence );
00631   //void encodeBases
00632   //( WordSequence& seq, const char* data, int wordLength, int numChars,
00633   // int& basesInSequence );
00634 
00635   // Functions to deal with state information
00636 
00637   // Function Name: saveState
00638   // Arguments: void
00639   // Returns:   SequenceReaderState*
00640   // saves the state (ie current file position) of a SequenceReader for future
00641   // restoration
00642   virtual SequenceReaderState* saveState( void ) const 
00643   { assert (1==0); return NULL;}
00644 
00645   // Function Name: restoreState
00646   // Arguments:   SequenceReaderState*
00647   // Returns:     void
00648   // restores the state (ie current file position) of a SequenceReader
00649   // then (NB!!) deletes *pState;
00650   virtual void restoreState( SequenceReaderState* pState ) 
00651   { assert (1==0);}
00652 
00653 
00654 
00655 
00656   // PROTECTED MEMBER FUNCTIONS 
00657   // (visible to this class and derived classes only)
00658   protected:
00659 
00660   // Function Name: computeNumSequencesInFile
00661   // Arguments: void
00662   // Returns:   int
00663   // Returns the number of sequences in the file - called by 
00664   // getNumSequencesInFile. NB this will lose the current place in the file.
00665   virtual SequenceNumber computeNumSequencesInFile( void ) = 0;
00666 
00667 
00668   // PRIVATE MEMBER FUNCTIONS
00669   // (visible to instances of this class only)
00670   
00671   private:
00672   SequenceReader& operator=(const SequenceReader&);   // NOT IMPLEMENTED
00673 
00674   // PROTECTED DATA:
00675   // (visible to instances of this class only)
00676   protected:
00677 
00678   // lastSequenceNumber_ is (surprise) the number of the last sequence that 
00679   // was read, and thus indicates the current position in the file. A value
00680   // of zero indicates we are at the beginning of the file
00681   SequenceNumber lastSequenceNumber_;     
00682 
00683   // allSequencesRead_ is initially set to false. Once the end of the file
00684   // has been reached for the first time it is set to true and 
00685   // numSequencesInFile_ is filled in.
00686   bool           allSequencesRead_;
00687 
00688   // numSequencesInFile_: we want to avoid calculating this unless we
00689   // absolutely have to, as it involves scanning all the way to the end of
00690   // the file, which can be slow for large files. 
00691   SequenceNumber numSequencesInFile_;
00692   ostream&  monitoringStream_; 
00693   //  SequenceReaderMode* pState_; now handled by SequenceEncoder
00694 
00695   // PRIVATE MEMBER DATA
00696   private:
00697 
00698 }; // SequenceReader
00699 
00700 
00701 // -----------------------------
00702 // Class Name : SequenceReaderState
00703 // Description: This preserves the state (ie position in a file) of
00704 // a SequenceReader
00705 class SequenceReaderState
00706 {
00707  public:
00708   SequenceReaderState( SequenceNumber lsn ) :
00709     lastSequenceNumber_(lsn) {}
00710   virtual ~SequenceReaderState() {}
00711   const SequenceNumber lastSequenceNumber_; 
00712   // no point in making this private as it's const
00713 };
00714 
00715 
00716 
00717 
00718 // ### Function Declarations ###
00719 
00720 // Name:
00721 // Arguments:
00722 // TYPE  NAME  IN/OUT COMMENT
00723 // Returns: TYPE COMMENT
00724 
00725 // End of include guard:
00726 #endif
00727 
00728 // End of file SequenceReader.h
00729 
00730 
00731 
00732 
00733 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2