00001 /* Last edited: Jan 14 13:32 2002 (ac2) */ 00002 00003 // ####################################################################### 00004 00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm 00006 // Version 3.2, released 1st March 2004 00007 // Copyright (c) Genome Research 2002 00008 00009 // SSAHA is free software; you can redistribute it and/or modify 00010 // it under the terms of version 2 of the GNU General Public Licence 00011 // as published by the Free Software Foundation. 00012 00013 // This program is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 // GNU General Public Licence for more details. 00017 00018 // You should have received a copy of the GNU General Public Licence 00019 // along with this program; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt 00022 00023 // ####################################################################### 00024 00025 // Module Name : SequenceReaderString 00026 // File Name : SequenceReaderString.h 00027 // Language : C++ 00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk) 00029 00030 // Include guard: 00031 #ifndef INCLUDED_SequenceReaderString 00032 #define INCLUDED_SequenceReaderString 00033 00034 // Description: 00035 00036 // Includes: 00037 00038 #include "SequenceReader.h" 00039 #include <string> 00040 // NB it is good practise for #include statements in header files to be 00041 // replaced by forward declarations if at all possible 00042 class WordSequence; 00043 //class SequenceEncoder; 00044 #include "SequenceEncoder.h" 00045 // ### Class Declarations ### 00046 00047 00048 // Class Name : SequenceReaderStringBase 00049 // Description: 00050 00051 class SequenceReaderStringBase : public SequenceReader 00052 { 00053 00054 // PUBLIC MEMBER FUNCTIONS 00055 public: 00056 00057 // Constructors and Destructors 00058 00059 // Function Name: 00060 // Arguments: 00061 // TYPE NAME IN/OUT COMMENT 00062 // Returns: TYPE COMMENT 00063 SequenceReaderStringBase 00064 ( const string& sequenceString, 00065 // NB SequenceReaderStringBase takes ownership of *pEncoder 00066 SequenceEncoder* pEncoder, 00067 ostream& monitoringStream = cerr ) : 00068 sequenceString_( sequenceString ), 00069 pEncoder_( pEncoder ), 00070 SequenceReader( monitoringStream ) 00071 { 00072 monitoringStream_ << "constructing SequenceReaderStringBase" << endl; 00073 } // constructor 00074 00075 00076 // Function Name: 00077 // Arguments: 00078 // TYPE NAME IN/OUT COMMENT 00079 // Returns: TYPE COMMENT 00080 SequenceReaderStringBase( const SequenceReaderStringBase& rhs ) : 00081 sequenceString_( rhs.sequenceString_ ), 00082 pEncoder_( rhs.pEncoder_->clone() ), 00083 SequenceReader( rhs.monitoringStream_ ) 00084 { 00085 monitoringStream_ << "copy constructing SequenceReaderStringBase" << endl; 00086 } // copy constructor 00087 00088 00089 00090 // Function Name: 00091 // Arguments: 00092 // TYPE NAME IN/OUT COMMENT 00093 // Returns: TYPE COMMENT 00094 virtual ~SequenceReaderStringBase() 00095 { 00096 monitoringStream_ << "destructing SequenceReaderStringBase" << endl; 00097 delete pEncoder_; 00098 } // destructor 00099 00100 // (NB destructor should be virtual if class is to be derived from) 00101 00102 // Manipulator Functions 00103 virtual SequenceReader* clone( void ) 00104 { return new SequenceReaderStringBase( *this ); } 00105 00106 // Function Name: 00107 // Arguments: 00108 // TYPE NAME IN/OUT COMMENT 00109 // Returns: TYPE COMMENT 00110 00111 // Function Name: changeMode 00112 // Arguments: const SequenceReaderMode& 00113 // Makes a copy of mode and uses it to handle mismatch character reads 00114 virtual void changeMode( SequenceReaderMode* pMode ) 00115 { 00116 pEncoder_->changeMode( pMode ); 00117 // pEncoder_->changeMode( pMode ); 00118 } 00119 00120 00121 00122 00123 00124 // Accessor Functions 00125 // (NB all accessor functions should be 'const') 00126 00127 // Function Name: rewind 00128 // Arguments: void 00129 // Returns: void 00130 // Rewind to the start of the data file, so that getNextSequence will 00131 // return the first sequence in the file 00132 void rewind( void ) 00133 { lastSequenceNumber_ = 0; } // ~rewind 00134 00135 // Function Name: findSequence 00136 // Arguments: SequenceNumber (in) 00137 // Returns: void 00138 // Winds the input file stream to the start of sequence number seqNum. 00139 // Returns false if seqNum exceeds the number of sequences in 00140 // the file. 00141 virtual bool findSequence( SequenceNumber seqNum ) 00142 { 00143 return (seqNum == 1); 00144 } 00145 00146 00147 // Function Name: getNextSequence 00148 // Arguments: WordSequence& (out), int (in) 00149 // Returns: int 00150 // Read the set of sequence information from the string and parse it 00151 // into WordSequence format. Returns -1 if a problem, else the number of 00152 // valid base pairs in the final word of the sequence 00153 virtual int getNextSequence( WordSequence& nextSeq, int wordLength ); 00154 00155 00156 00157 // Function Name: getSequence 00158 // Arguments: WordSequence& (out), SequenceNumber (in), int (in) 00159 // Returns: int 00160 // Read the sequenceNumber-th set of sequence information from the file and 00161 // parse it into WordSequence format 00162 virtual int getSequence 00163 ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength ) 00164 { 00165 // only one sequence in a string, so just ignore sequenceNumber 00166 return getNextSequence( nextSeq, wordLength ); 00167 } // ~getSequence 00168 00169 // Function Name: getLastSequenceName 00170 // Arguments: string& (out) 00171 // Returns: void 00172 // Fills the string with the name of the last sequence read 00173 void getLastSequenceName( string& seqName ) const 00174 { 00175 seqName = "UnnamedSequence"; 00176 } // ~SequenceReaderStringBase::getLastSequenceName 00177 00178 // Function Name: getBitsPerSymbol 00179 // Arguments: none 00180 // Returns: int 00181 // Returns number of bits per symbol used in encoding 00182 virtual int getBitsPerSymbol ( void ) const 00183 { 00184 return pEncoder_->getBitsPerSymbol(); 00185 } // ~SequenceReaderString::getBitsPerSymbol ( void ) const 00186 00187 // Function Name: getSourceDataType 00188 // Arguments: none 00189 // Returns: SourceDataType 00190 // Returns type of data being encoded (protein or DNA) 00191 virtual SourceDataType getSourceDataType( void ) const 00192 { 00193 return pEncoder_->getSourceDataType(); 00194 } 00195 00196 00197 00198 00199 00200 // Function Name: printName 00201 // Arguments: ostream& (out), SequenceNumber (in) 00202 // Returns: void 00203 // Sends the name of the requested sequence to the output stream 00204 virtual bool printName( ostream& os, SequenceNumber seqNum ) 00205 { 00206 os << "UnnamedSequence" << endl; 00207 return true; 00208 } // ~SequenceReaderStringBase::printName 00209 00210 00211 00212 // Function Name: printSideInfo 00213 // Arguments: ostream& (out), SequenceNumber (in) 00214 // Returns: void 00215 // Sends the side info for the requested sequence to the output stream 00216 virtual bool printSideInfo( ostream& os, SequenceNumber seqNum ) 00217 { 00218 return true; 00219 } // ~SequenceReaderStringBase::printName 00220 00221 00222 // Function Name: printSource 00223 // Arguments: string& (out), SequenceNumber (in) 00224 // Returns: void 00225 // Sends the source data (e.g. ASCII) for the requested sequence 00226 // to the output stream 00227 virtual bool printSource( ostream& os, SequenceNumber seqNum ) 00228 { 00229 os << sequenceString_; 00230 return true; 00231 } // ~SequenceReaderStringBase::printName 00232 00233 00234 // Function Name: computeNumSequencesInFile 00235 // Arguments: void 00236 // Returns: SequenceNumber 00237 // Returns the number of sequences in the string - always 1!!! 00238 SequenceNumber computeNumSequencesInFile( void ) { return 1; } 00239 00240 00241 // PROTECTED MEMBER FUNCTIONS 00242 // (visible to this class and derived classes only) 00243 protected: 00244 00245 // PRIVATE MEMBER FUNCTIONS 00246 // (visible to instances of this class only) 00247 00248 private: 00249 00250 SequenceReaderStringBase& operator=(const SequenceReaderStringBase&); 00251 // NOT IMPLEMENTED 00252 00253 // PRIVATE MEMBER DATA 00254 private: 00255 const string sequenceString_; 00256 // ENCODER encoder_; 00257 SequenceEncoder* pEncoder_; 00258 00259 }; // SequenceReaderStringBase 00260 00261 //typedef SequenceReaderStringBase<SequenceEncoderDNA> 00262 //SequenceReaderString; 00263 //typedef SequenceReaderStringBase<SequenceEncoderDNA> 00264 //SequenceReaderStringDNA; 00265 //typedef SequenceReaderStringBase<SequenceEncoderProtein> 00266 //SequenceReaderStringProtein; 00267 00268 00269 class SequenceReaderStringDNA : public SequenceReaderStringBase 00270 { 00271 public: 00272 SequenceReaderStringDNA 00273 ( const string& sequenceString, ostream& monitoringStream = cerr ) : 00274 SequenceReaderStringBase( sequenceString, 00275 new SequenceEncoderDNA(), 00276 monitoringStream ) {} 00277 }; 00278 class SequenceReaderStringProtein : public SequenceReaderStringBase 00279 { 00280 public: 00281 SequenceReaderStringProtein 00282 ( const string& sequenceString, ostream& monitoringStream = cerr ) : 00283 SequenceReaderStringBase( sequenceString, 00284 new SequenceEncoderProtein(), 00285 monitoringStream ) {} 00286 }; 00287 00288 typedef SequenceReaderStringDNA SequenceReaderString; 00289 00290 //class SequenceReaderStringProtein : public SequenceReaderStringBase 00291 //{ 00292 // public: 00293 //SequenceReaderStringProtein 00294 //( const string& sequenceString, ostream& monitoringStream = cerr ); 00295 //}; 00296 // 00297 00298 00299 00300 // ### Function Declarations ### 00301 00302 // Name: 00303 // Arguments: 00304 // TYPE NAME IN/OUT COMMENT 00305 // Returns: TYPE COMMENT 00306 00307 // End of include guard: 00308 #endif 00309 00310 // End of file SequenceReaderString.h 00311 00312
1.5.2