SequenceReader/SequenceReaderString.h

Go to the documentation of this file.
00001 /*  Last edited: Jan 14 13:32 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : SequenceReaderString
00026 // File Name    : SequenceReaderString.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_SequenceReaderString
00032 #define INCLUDED_SequenceReaderString
00033 
00034 // Description:
00035 
00036 // Includes:
00037 
00038 #include "SequenceReader.h"
00039 #include <string>
00040 // NB it is good practise for #include statements in header files to be
00041 // replaced by forward declarations if at all possible
00042 class WordSequence;
00043 //class SequenceEncoder;
00044 #include "SequenceEncoder.h"
00045 // ### Class Declarations ###
00046 
00047 
00048 // Class Name : SequenceReaderStringBase
00049 // Description: 
00050 
00051 class SequenceReaderStringBase : public SequenceReader
00052 {
00053 
00054   // PUBLIC MEMBER FUNCTIONS
00055   public:
00056 
00057   // Constructors and Destructors
00058 
00059   // Function Name:
00060   // Arguments:
00061   // TYPE  NAME  IN/OUT COMMENT
00062   // Returns: TYPE COMMENT
00063 SequenceReaderStringBase
00064 ( const string& sequenceString, 
00065   // NB SequenceReaderStringBase takes ownership of *pEncoder
00066   SequenceEncoder* pEncoder,
00067   ostream& monitoringStream = cerr ) :
00068   sequenceString_( sequenceString ), 
00069   pEncoder_( pEncoder ),
00070   SequenceReader( monitoringStream )
00071 {
00072   monitoringStream_ << "constructing SequenceReaderStringBase" << endl;
00073 } // constructor
00074 
00075 
00076   // Function Name:
00077   // Arguments:
00078   // TYPE  NAME  IN/OUT COMMENT
00079   // Returns: TYPE COMMENT
00080 SequenceReaderStringBase( const SequenceReaderStringBase& rhs ) :
00081 sequenceString_( rhs.sequenceString_ ), 
00082 pEncoder_( rhs.pEncoder_->clone() ),
00083 SequenceReader( rhs.monitoringStream_ )
00084 {
00085   monitoringStream_ << "copy constructing SequenceReaderStringBase" << endl;
00086 } // copy constructor
00087 
00088 
00089 
00090   // Function Name:
00091   // Arguments:
00092   // TYPE  NAME  IN/OUT COMMENT
00093   // Returns: TYPE COMMENT
00094 virtual ~SequenceReaderStringBase()
00095 {
00096   monitoringStream_ << "destructing SequenceReaderStringBase" << endl;
00097   delete pEncoder_;
00098 } // destructor
00099 
00100   // (NB destructor should be virtual if class is to be derived from)
00101 
00102   // Manipulator Functions
00103   virtual SequenceReader* clone( void ) 
00104   { return new SequenceReaderStringBase( *this ); }
00105 
00106   // Function Name:
00107   // Arguments:
00108   // TYPE  NAME  IN/OUT COMMENT
00109   // Returns: TYPE COMMENT
00110 
00111   // Function Name: changeMode
00112   // Arguments: const SequenceReaderMode&
00113   // Makes a copy of mode and uses it to handle mismatch character reads
00114 virtual void changeMode( SequenceReaderMode* pMode )
00115 {
00116   pEncoder_->changeMode( pMode );
00117   //  pEncoder_->changeMode( pMode );
00118 }
00119 
00120 
00121 
00122 
00123 
00124   // Accessor Functions
00125   // (NB all accessor functions should be 'const')
00126 
00127   // Function Name: rewind
00128   // Arguments: void
00129   // Returns:   void
00130   // Rewind to the start of the data file, so that getNextSequence will
00131   // return the first sequence in the file
00132   void rewind( void )
00133   { lastSequenceNumber_ = 0; } // ~rewind
00134 
00135   // Function Name: findSequence
00136   // Arguments: SequenceNumber (in)
00137   // Returns:   void
00138   // Winds the input file stream to the start of sequence number seqNum. 
00139   // Returns false if seqNum exceeds the number of sequences in
00140   // the file.
00141 virtual bool findSequence( SequenceNumber seqNum ) 
00142 {
00143   return (seqNum == 1);
00144 } 
00145 
00146 
00147   // Function Name: getNextSequence
00148   // Arguments: WordSequence& (out), int (in)
00149   // Returns:   int
00150   // Read the set of sequence information from the string and parse it
00151   // into WordSequence format. Returns -1 if a problem, else the number of
00152   // valid base pairs in the final word of the sequence
00153 virtual int getNextSequence( WordSequence& nextSeq, int wordLength );
00154 
00155 
00156 
00157 // Function Name: getSequence
00158 // Arguments: WordSequence& (out), SequenceNumber (in), int (in)
00159 // Returns:   int
00160 // Read the sequenceNumber-th set of sequence information from the file and 
00161 // parse it into WordSequence format
00162 virtual int getSequence
00163 ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength )
00164 {
00165   // only one sequence in a string, so just ignore sequenceNumber
00166   return getNextSequence( nextSeq, wordLength );
00167 } // ~getSequence
00168  
00169   // Function Name: getLastSequenceName
00170   // Arguments: string& (out)
00171   // Returns:   void
00172   // Fills the string with the name of the last sequence read 
00173 void getLastSequenceName( string& seqName ) const
00174 {
00175   seqName = "UnnamedSequence";
00176 } // ~SequenceReaderStringBase::getLastSequenceName
00177 
00178   // Function Name: getBitsPerSymbol
00179   // Arguments: none
00180   // Returns:   int
00181   // Returns number of bits per symbol used in encoding
00182 virtual int getBitsPerSymbol ( void ) const
00183 {
00184   return pEncoder_->getBitsPerSymbol();
00185 } // ~SequenceReaderString::getBitsPerSymbol ( void ) const
00186 
00187   // Function Name: getSourceDataType
00188   // Arguments: none
00189   // Returns:   SourceDataType
00190   // Returns type of data being encoded (protein or DNA)
00191 virtual SourceDataType getSourceDataType( void ) const
00192 {
00193   return pEncoder_->getSourceDataType();
00194 }
00195 
00196 
00197 
00198 
00199 
00200   // Function Name: printName
00201   // Arguments: ostream& (out), SequenceNumber (in)
00202   // Returns:   void
00203   // Sends the name of the requested sequence to the output stream
00204 virtual bool printName( ostream& os, SequenceNumber seqNum )
00205 {
00206   os << "UnnamedSequence" << endl;
00207   return true;
00208 } // ~SequenceReaderStringBase::printName
00209 
00210 
00211 
00212   // Function Name: printSideInfo
00213   // Arguments: ostream& (out), SequenceNumber (in)
00214   // Returns:   void
00215   // Sends the side info for the requested sequence to the output stream
00216 virtual bool printSideInfo( ostream& os, SequenceNumber seqNum )
00217 {
00218   return true;
00219 } // ~SequenceReaderStringBase::printName
00220 
00221 
00222   // Function Name: printSource
00223   // Arguments: string& (out), SequenceNumber (in)
00224   // Returns:   void
00225   // Sends the source data (e.g. ASCII) for the requested sequence
00226   // to the output stream
00227 virtual bool printSource( ostream& os, SequenceNumber seqNum )
00228 {
00229   os << sequenceString_;
00230   return true;
00231 } // ~SequenceReaderStringBase::printName
00232 
00233 
00234   // Function Name: computeNumSequencesInFile
00235   // Arguments: void
00236   // Returns:   SequenceNumber
00237   // Returns the number of sequences in the string - always 1!!!
00238   SequenceNumber computeNumSequencesInFile( void ) { return 1; } 
00239 
00240 
00241   // PROTECTED MEMBER FUNCTIONS 
00242   // (visible to this class and derived classes only)
00243   protected:
00244 
00245   // PRIVATE MEMBER FUNCTIONS
00246   // (visible to instances of this class only)
00247   
00248   private:
00249 
00250   SequenceReaderStringBase& operator=(const SequenceReaderStringBase&);   
00251   // NOT IMPLEMENTED
00252 
00253   // PRIVATE MEMBER DATA
00254   private:
00255   const string sequenceString_;     
00256 //  ENCODER encoder_;
00257 SequenceEncoder* pEncoder_;
00258 
00259 }; // SequenceReaderStringBase
00260 
00261 //typedef SequenceReaderStringBase<SequenceEncoderDNA> 
00262 //SequenceReaderString;
00263 //typedef SequenceReaderStringBase<SequenceEncoderDNA> 
00264 //SequenceReaderStringDNA;
00265 //typedef SequenceReaderStringBase<SequenceEncoderProtein> 
00266 //SequenceReaderStringProtein;
00267 
00268 
00269 class SequenceReaderStringDNA : public SequenceReaderStringBase
00270 {
00271  public:
00272 SequenceReaderStringDNA
00273   ( const string& sequenceString, ostream& monitoringStream = cerr ) :
00274   SequenceReaderStringBase( sequenceString, 
00275                             new SequenceEncoderDNA(), 
00276                              monitoringStream ) {}
00277 };
00278 class SequenceReaderStringProtein : public SequenceReaderStringBase
00279 {
00280  public:
00281 SequenceReaderStringProtein
00282   ( const string& sequenceString, ostream& monitoringStream = cerr ) :
00283   SequenceReaderStringBase( sequenceString, 
00284                             new SequenceEncoderProtein(), 
00285                              monitoringStream ) {}
00286 };
00287 
00288 typedef SequenceReaderStringDNA SequenceReaderString;
00289 
00290 //class SequenceReaderStringProtein : public SequenceReaderStringBase
00291 //{
00292 // public:
00293 //SequenceReaderStringProtein
00294 //( const string& sequenceString,  ostream& monitoringStream = cerr );
00295 //};
00296 //
00297 
00298 
00299 
00300 // ### Function Declarations ###
00301 
00302 // Name:
00303 // Arguments:
00304 // TYPE  NAME  IN/OUT COMMENT
00305 // Returns: TYPE COMMENT
00306 
00307 // End of include guard:
00308 #endif
00309 
00310 // End of file SequenceReaderString.h
00311 
00312 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2