SequenceReader/SequenceReaderLocal.h

Go to the documentation of this file.
00001 /*  Last edited: Jan 14 13:25 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : SequenceReaderLocal
00026 // File Name    : SequenceReaderLocal.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_SequenceReaderLocal
00032 #define INCLUDED_SequenceReaderLocal
00033 
00034 // Description:
00035 
00036 // Includes:
00037 
00038 #include "SequenceReader.h"
00039 #include "GlobalDefinitions.h"
00040 #include <vector>
00041 // NB it is good practise for #include statements in header files to be
00042 // replaced by forward declarations if at all possible
00043 
00044 // ### Class Declarations ###
00045 
00046 // Class Name : SequenceReaderLocal
00047 // Description: Classes such as SequenceReaderFasta essentially scan through a
00048 // file sequentially. This is not always a problem, but may be slow if random
00049 // access to sequences is required. SequenceReaderLocal takes sequence data
00050 // from another instance of SequenceReader and holds it in local memory
00051 // for fast random access. Downside is that a) more memory is used and
00052 // b) the number of base pairs per word is fixed at construction time (the
00053 // latter could be remedied but I'm not convinced it's worth it)
00054 class SequenceReaderLocal : public SequenceReader
00055 {
00056 
00057 
00058   // PUBLIC MEMBER FUNCTIONS
00059   public:
00060   typedef pair< WordSequence, std::string> SequenceInfo;
00061 
00062   // Constructors and Destructors
00063 
00064   // Function Name: Constructor
00065   // Arguments: SequenceReader&, int, ostream&
00066   // Takes the data from seqFile and places it in seqData, seqBasesInLast
00067   // and seqNames
00068   SequenceReaderLocal
00069   ( SequenceReader& seqFile, 
00070     int wordLength, 
00071     ostream& monitoringStream = cerr );
00072 
00073   // Function Name: Constructor
00074   // Creates an empty SequenceReaderLocal
00075   SequenceReaderLocal
00076   ( int wordLength, int bitsPerSymbol, ostream& monitoringStream = cerr );
00077 
00078   // Function Name: Copy constructor
00079   // Arguments:
00080   // NB This is potentially slow. 
00081   SequenceReaderLocal( const SequenceReaderLocal& rhs );
00082 
00083 
00084   // Function Name: Destructor
00085   // Arguments:
00086   ~SequenceReaderLocal(); 
00087   // (NB destructor should be virtual if class is to be derived from)
00088 
00089   // Manipulator Functions
00090   virtual SequenceReader* clone( void ) 
00091   { return new SequenceReaderLocal( *this ); }
00092 
00093   // Function Name:
00094   // Arguments:
00095   // TYPE  NAME  IN/OUT COMMENT
00096   // Returns: TYPE COMMENT
00097 
00098   // Function Name: changeMode
00099   // Arguments: const SequenceReaderMode&
00100   // Makes a copy of mode and uses it to handle mismatch character reads
00101   // Does nothing here because any character reading necessary will have 
00102   // been done by the SequenceReader this class is constructed from 
00103   virtual void changeMode( SequenceReaderMode* pMode ) {}
00104 
00105   // Accessor Functions
00106   // (NB all accessor functions should be 'const')
00107 
00108   // Function Name: rewind
00109   // Arguments: void
00110   // Returns:   void
00111   // Rewind to the start of the data file, so that getNextSequence will
00112   // return the first sequence in the file
00113   virtual void rewind( void ) 
00114   {
00115     lastSequenceNumber_ = 0;
00116     // don't need to do anything else, because we're set up for random access
00117   } // ~rewind 
00118 
00119   // Function Name: findSequence
00120   // Arguments: SequenceNumber (in)
00121   // Returns:   void
00122   // Winds the input file stream to the start of sequence number seqNum. 
00123   // Returns false if seqNum exceeds the number of sequences in
00124   // the file.
00125   virtual bool findSequence( SequenceNumber seqNum ); 
00126 
00127   // Function Name: getNextSequence
00128   // Arguments: WordSequence& (out), int (in)
00129   // Returns:   int
00130   // Read the next set of sequence information from the file and parse it
00131   // into WordSequence format. Returns -1 if there has been a problem with
00132   // reading the sequence, else returns the number of valid base pairs 
00133   // contained within the final word of the sequence.
00134   virtual int getNextSequence( WordSequence& nextSeq, int wordLength );
00135 
00136   // Function Name: getSequence
00137   // Arguments: WordSequence& (out), SequenceNumber (in), int (in)
00138   // Returns:   bool
00139   // Read the sequenceNumber-th set of sequence information from the file and 
00140   // parse it into WordSequence format
00141   virtual int getSequence
00142   ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength );
00143 
00144   // Function Name: getLastSequenceName
00145   // Arguments: string& (out)
00146   // Returns:   void
00147   // Fills the string with the name of the last sequence read 
00148   virtual void getLastSequenceName( string& seqName ) const;
00149 
00150   // Function Name: getBitsPerSymbol
00151   // Arguments: none
00152   // Returns:   int
00153   // Returns number of bits per symbol used in encoding
00154   virtual int getBitsPerSymbol ( void ) const 
00155   {
00156     return bitsPerSymbol_;
00157   }
00158 
00159   // Function Name: getSourceDataType
00160   // Arguments: none
00161   // Returns:   SourceDataType
00162   // Returns type of data being encoded (protein or DNA)
00163   virtual SourceDataType getSourceDataType( void ) const
00164   {
00165     return sourceData_;
00166   }
00167 
00168   // Function Name: printName
00169   // Arguments: string& (out), SequenceNumber (in)
00170   // Returns:   void
00171   // Fills a string with the name of the requested sequence
00172   virtual bool printName( ostream& os, SequenceNumber seqNum );
00173 
00174   // Function Name: printSideInfo
00175   // Arguments: string& (out), SequenceNumber (in)
00176   // Returns:   void
00177   // Fills a string with the name of the requested sequence
00178   virtual bool printSideInfo( ostream& os, SequenceNumber seqNum );
00179 
00180   // Function Name: printSource
00181   // Arguments: string& (out), SequenceNumber (in)
00182   // Returns:   void
00183   // Fills a string with the name of the requested sequence
00184   virtual bool printSource( ostream& os, SequenceNumber seqNum );
00185 
00186   SequenceInfo& back( void ) { return seqData_.back(); }
00187   void push_back( void )     { seqData_.push_back(SequenceInfo());   }
00188   void pop_back( void )      { seqData_.pop_back();    }
00189   const SequenceInfo& operator[]( vector<SequenceInfo>::size_type i ) 
00190   { 
00191     if ( i >= seqData_.size() )
00192       throw SSAHAException
00193         ("Tried to access non-existent sequence in SequenceReaderLocal"); 
00194     else return seqData_[i]; 
00195   }
00196   
00197 
00198 
00199   // PROTECTED MEMBER FUNCTIONS 
00200   // (visible to this class and derived classes only)
00201   protected:
00202 
00203   // Function Name: computeNumSequencesInFile
00204   // Arguments: void
00205   // Returns:   SequenceNumber
00206   // Returns the number of sequences in the file - called by 
00207   // getNumSequencesInFile. This is a null function because for 
00208   // SequenceReaderLocal this is determined as part of the construction 
00209   // process.
00210   virtual SequenceNumber computeNumSequencesInFile( void )
00211   { return seqData_.size(); }
00212 
00213   // PRIVATE MEMBER FUNCTIONS
00214   // (visible to instances of this class only)
00215   
00216   private:
00217   SequenceReaderLocal& operator=(const SequenceReaderLocal&);   // NOT IMPLEMENTED
00218 
00219   // PROTECTED DATA:
00220   // (visible to instances of this class only)
00221   protected:
00222   vector<SequenceInfo> seqData_;
00223 
00224   int wordLength_;
00225   int bitsPerSymbol_;
00226   SourceDataType sourceData_;
00227 
00228   // PRIVATE MEMBER DATA
00229   private:
00230 
00231 }; // ~class SequenceReaderLocal
00232 
00233 
00234 
00235 // ### Function Declarations ###
00236 
00237 // Name:
00238 // Arguments:
00239 // TYPE  NAME  IN/OUT COMMENT
00240 // Returns: TYPE COMMENT
00241 
00242 // End of include guard:
00243 #endif
00244 
00245 // End of file SequenceReaderLocal.h
00246 
00247 
00248 
00249 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2