00001 /* Last edited: Jan 14 13:25 2002 (ac2) */ 00002 00003 // ####################################################################### 00004 00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm 00006 // Version 3.2, released 1st March 2004 00007 // Copyright (c) Genome Research 2002 00008 00009 // SSAHA is free software; you can redistribute it and/or modify 00010 // it under the terms of version 2 of the GNU General Public Licence 00011 // as published by the Free Software Foundation. 00012 00013 // This program is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 // GNU General Public Licence for more details. 00017 00018 // You should have received a copy of the GNU General Public Licence 00019 // along with this program; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt 00022 00023 // ####################################################################### 00024 00025 // Module Name : SequenceReaderLocal 00026 // File Name : SequenceReaderLocal.h 00027 // Language : C++ 00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk) 00029 00030 // Include guard: 00031 #ifndef INCLUDED_SequenceReaderLocal 00032 #define INCLUDED_SequenceReaderLocal 00033 00034 // Description: 00035 00036 // Includes: 00037 00038 #include "SequenceReader.h" 00039 #include "GlobalDefinitions.h" 00040 #include <vector> 00041 // NB it is good practise for #include statements in header files to be 00042 // replaced by forward declarations if at all possible 00043 00044 // ### Class Declarations ### 00045 00046 // Class Name : SequenceReaderLocal 00047 // Description: Classes such as SequenceReaderFasta essentially scan through a 00048 // file sequentially. This is not always a problem, but may be slow if random 00049 // access to sequences is required. SequenceReaderLocal takes sequence data 00050 // from another instance of SequenceReader and holds it in local memory 00051 // for fast random access. Downside is that a) more memory is used and 00052 // b) the number of base pairs per word is fixed at construction time (the 00053 // latter could be remedied but I'm not convinced it's worth it) 00054 class SequenceReaderLocal : public SequenceReader 00055 { 00056 00057 00058 // PUBLIC MEMBER FUNCTIONS 00059 public: 00060 typedef pair< WordSequence, std::string> SequenceInfo; 00061 00062 // Constructors and Destructors 00063 00064 // Function Name: Constructor 00065 // Arguments: SequenceReader&, int, ostream& 00066 // Takes the data from seqFile and places it in seqData, seqBasesInLast 00067 // and seqNames 00068 SequenceReaderLocal 00069 ( SequenceReader& seqFile, 00070 int wordLength, 00071 ostream& monitoringStream = cerr ); 00072 00073 // Function Name: Constructor 00074 // Creates an empty SequenceReaderLocal 00075 SequenceReaderLocal 00076 ( int wordLength, int bitsPerSymbol, ostream& monitoringStream = cerr ); 00077 00078 // Function Name: Copy constructor 00079 // Arguments: 00080 // NB This is potentially slow. 00081 SequenceReaderLocal( const SequenceReaderLocal& rhs ); 00082 00083 00084 // Function Name: Destructor 00085 // Arguments: 00086 ~SequenceReaderLocal(); 00087 // (NB destructor should be virtual if class is to be derived from) 00088 00089 // Manipulator Functions 00090 virtual SequenceReader* clone( void ) 00091 { return new SequenceReaderLocal( *this ); } 00092 00093 // Function Name: 00094 // Arguments: 00095 // TYPE NAME IN/OUT COMMENT 00096 // Returns: TYPE COMMENT 00097 00098 // Function Name: changeMode 00099 // Arguments: const SequenceReaderMode& 00100 // Makes a copy of mode and uses it to handle mismatch character reads 00101 // Does nothing here because any character reading necessary will have 00102 // been done by the SequenceReader this class is constructed from 00103 virtual void changeMode( SequenceReaderMode* pMode ) {} 00104 00105 // Accessor Functions 00106 // (NB all accessor functions should be 'const') 00107 00108 // Function Name: rewind 00109 // Arguments: void 00110 // Returns: void 00111 // Rewind to the start of the data file, so that getNextSequence will 00112 // return the first sequence in the file 00113 virtual void rewind( void ) 00114 { 00115 lastSequenceNumber_ = 0; 00116 // don't need to do anything else, because we're set up for random access 00117 } // ~rewind 00118 00119 // Function Name: findSequence 00120 // Arguments: SequenceNumber (in) 00121 // Returns: void 00122 // Winds the input file stream to the start of sequence number seqNum. 00123 // Returns false if seqNum exceeds the number of sequences in 00124 // the file. 00125 virtual bool findSequence( SequenceNumber seqNum ); 00126 00127 // Function Name: getNextSequence 00128 // Arguments: WordSequence& (out), int (in) 00129 // Returns: int 00130 // Read the next set of sequence information from the file and parse it 00131 // into WordSequence format. Returns -1 if there has been a problem with 00132 // reading the sequence, else returns the number of valid base pairs 00133 // contained within the final word of the sequence. 00134 virtual int getNextSequence( WordSequence& nextSeq, int wordLength ); 00135 00136 // Function Name: getSequence 00137 // Arguments: WordSequence& (out), SequenceNumber (in), int (in) 00138 // Returns: bool 00139 // Read the sequenceNumber-th set of sequence information from the file and 00140 // parse it into WordSequence format 00141 virtual int getSequence 00142 ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength ); 00143 00144 // Function Name: getLastSequenceName 00145 // Arguments: string& (out) 00146 // Returns: void 00147 // Fills the string with the name of the last sequence read 00148 virtual void getLastSequenceName( string& seqName ) const; 00149 00150 // Function Name: getBitsPerSymbol 00151 // Arguments: none 00152 // Returns: int 00153 // Returns number of bits per symbol used in encoding 00154 virtual int getBitsPerSymbol ( void ) const 00155 { 00156 return bitsPerSymbol_; 00157 } 00158 00159 // Function Name: getSourceDataType 00160 // Arguments: none 00161 // Returns: SourceDataType 00162 // Returns type of data being encoded (protein or DNA) 00163 virtual SourceDataType getSourceDataType( void ) const 00164 { 00165 return sourceData_; 00166 } 00167 00168 // Function Name: printName 00169 // Arguments: string& (out), SequenceNumber (in) 00170 // Returns: void 00171 // Fills a string with the name of the requested sequence 00172 virtual bool printName( ostream& os, SequenceNumber seqNum ); 00173 00174 // Function Name: printSideInfo 00175 // Arguments: string& (out), SequenceNumber (in) 00176 // Returns: void 00177 // Fills a string with the name of the requested sequence 00178 virtual bool printSideInfo( ostream& os, SequenceNumber seqNum ); 00179 00180 // Function Name: printSource 00181 // Arguments: string& (out), SequenceNumber (in) 00182 // Returns: void 00183 // Fills a string with the name of the requested sequence 00184 virtual bool printSource( ostream& os, SequenceNumber seqNum ); 00185 00186 SequenceInfo& back( void ) { return seqData_.back(); } 00187 void push_back( void ) { seqData_.push_back(SequenceInfo()); } 00188 void pop_back( void ) { seqData_.pop_back(); } 00189 const SequenceInfo& operator[]( vector<SequenceInfo>::size_type i ) 00190 { 00191 if ( i >= seqData_.size() ) 00192 throw SSAHAException 00193 ("Tried to access non-existent sequence in SequenceReaderLocal"); 00194 else return seqData_[i]; 00195 } 00196 00197 00198 00199 // PROTECTED MEMBER FUNCTIONS 00200 // (visible to this class and derived classes only) 00201 protected: 00202 00203 // Function Name: computeNumSequencesInFile 00204 // Arguments: void 00205 // Returns: SequenceNumber 00206 // Returns the number of sequences in the file - called by 00207 // getNumSequencesInFile. This is a null function because for 00208 // SequenceReaderLocal this is determined as part of the construction 00209 // process. 00210 virtual SequenceNumber computeNumSequencesInFile( void ) 00211 { return seqData_.size(); } 00212 00213 // PRIVATE MEMBER FUNCTIONS 00214 // (visible to instances of this class only) 00215 00216 private: 00217 SequenceReaderLocal& operator=(const SequenceReaderLocal&); // NOT IMPLEMENTED 00218 00219 // PROTECTED DATA: 00220 // (visible to instances of this class only) 00221 protected: 00222 vector<SequenceInfo> seqData_; 00223 00224 int wordLength_; 00225 int bitsPerSymbol_; 00226 SourceDataType sourceData_; 00227 00228 // PRIVATE MEMBER DATA 00229 private: 00230 00231 }; // ~class SequenceReaderLocal 00232 00233 00234 00235 // ### Function Declarations ### 00236 00237 // Name: 00238 // Arguments: 00239 // TYPE NAME IN/OUT COMMENT 00240 // Returns: TYPE COMMENT 00241 00242 // End of include guard: 00243 #endif 00244 00245 // End of file SequenceReaderLocal.h 00246 00247 00248 00249
1.5.2