HashTable/HashTableTranslated.h

Go to the documentation of this file.
00001 /*  Last edited: May 29 11:42 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : HashTableTranslated
00026 // File Name    : HashTableTranslated.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_HashTableTranslated
00032 #define INCLUDED_HashTableTranslated
00033 
00034 // Description:
00035 
00036 // Includes:
00037 #include "HashTablePacked.h"
00038 //#include "SequenceReaderCodon.h"
00039 #include "SequenceEncoder.h"
00040 
00041 // NB it is good practise for #include statements in header files to be
00042 // replaced by forward declarations if at all possible
00043 
00044 // ### Class Declarations ###
00045 
00046 
00047 // Class Name :
00048 // Description: 
00049 
00050 
00051 // Class Name : HashTableComponent
00052 // Description: A HashTableTranslated contains two of these
00053 // Each contains the hit information for all three reading frames
00054 // of one of the two strand directions. 
00055 class HashTableComponent : public HashTablePacked
00056 {
00057  public:
00058 
00059   HashTableComponent( ostream& monitoringStream, string name,
00060                    Allocator<PositionPacked>& hitListAllocator 
00061                    = defaultHitListAllocator,
00062                    Allocator<PositionInHitList>& arrayAllocator 
00063                    = defaultArrayAllocator );
00064 
00065   virtual void convertHits
00066   ( PackedHitStore& hits, HitList& hitListFwd );
00067 
00068   void setQueryFrame( int qf ) { queryFrame_ = qf; }
00069   //  void setSubjectFrame( int sf ) { subjectFrame_ = sf; }
00070 
00071  private:
00072   int queryFrame_;
00073   //  int subjectFrame_;
00074 
00075 
00076 }; // ~class HashTableComponent
00077 
00078 // Class Name : HashTablePackedProtein 
00079 // Description: Operates in two modes, to allow both protein and
00080 // DNA queries to be matched. A DNA query (assumed to be a WordSequence
00081 // withg gMaxBasesPerWord bases per word) is 3 way translated and
00082 // each translation is matched. The hit information is converted to
00083 // the DNA frame. This means the protein positions have to be 
00084 // converted to the protein frame again at the generateMatches stage
00085 class HashTablePackedProtein : public HashTablePacked
00086 {
00087  public:
00088   typedef void (HashTablePackedProtein::* MatchSequencePointer)
00089     (WordSequence&, HitList&);
00090 
00091   HashTablePackedProtein( ostream& monitoringStream, string name = "",
00092                    Allocator<PositionPacked>& hitListAllocator 
00093                    = defaultHitListAllocator,
00094                    Allocator<PositionInHitList>& arrayAllocator 
00095                    = defaultArrayAllocator );
00096 
00097 
00098   virtual void convertHits
00099   ( PackedHitStore& packedHits, HitList& hitListFwd );
00100 
00101   virtual void matchSequence
00102     ( WordSequence& seq, HitList& hitListFwd )
00103   {
00104     (this->*pMatchSequence_)(seq, hitListFwd);
00105   } // ~matchSequence
00106 
00107   void matchSequenceProtein( WordSequence& seq, HitList& hitListFwd );
00108   void matchSequenceTranslatedDNA( WordSequence& seq, HitList& hitListFwd );
00109 
00110   void setQueryProtein( void ) 
00111   { 
00112     pMatchSequence_ = &HashTablePackedProtein::matchSequenceProtein;
00113     queryMult_  = 1;
00114     queryFrame_ = 0;
00115   } 
00116   void setQueryTranslatedDNA( void ) 
00117   {
00118     pMatchSequence_ = &HashTablePackedProtein::matchSequenceTranslatedDNA;
00119     queryMult_  = gNumReadingFrames;
00120     queryFrame_ = 0;
00121   }
00122 
00123   private:
00124   CodonList codons_;
00125   SequenceEncoderCodon codonEncoder_;
00126 
00127   int queryFrame_;
00128   int queryMult_;
00129 
00130   MatchSequencePointer pMatchSequence_;
00131 
00132 
00133 
00134 
00135 }; // ~HashTablePackedProtein
00136 
00137 // Class Name : HashTableTranslated
00138 // Description: Stores the hit information for the 6-way translation of 
00139 // a set of DNA sequences. Then allows either protein or 6-way translated
00140 // DNA to be run against them.
00141 class HashTableTranslated : public HashTableGeneric
00142 {
00143 
00144   // PUBLIC MEMBER FUNCTIONS
00145   public:
00146   typedef void (HashTableTranslated::* MatchSequencePointer)
00147     (WordSequence&, HitList&);
00148 
00149   enum { eDNAWordSizeForHashing = gMaxBasesPerWord-1 };
00150 
00151   // Constructors and Destructors
00152 
00153   // Function Name:
00154   // Arguments:
00155   // TYPE  NAME  IN/OUT COMMENT
00156   // Returns: TYPE COMMENT
00157   HashTableTranslated( ostream& monitoringStream=cerr,
00158                        string name="",
00159                        Allocator<PositionPacked>& hitListAllocator
00160                        = HashTablePacked::defaultHitListAllocator,
00161                        Allocator<PositionInHitList>& arrayAllocator
00162                        = HashTablePacked::defaultArrayAllocator );
00163 
00164 
00165   // Function Name:
00166   // Arguments:
00167   // TYPE  NAME  IN/OUT COMMENT
00168   // Returns: TYPE COMMENT
00169   virtual ~HashTableTranslated() {}
00170   // (NB destructor should be virtual if class is to be derived from)
00171 
00172   // Manipulator Functions
00173 
00174   // Function Name:
00175   // Arguments:
00176   // TYPE  NAME  IN/OUT COMMENT
00177   // Returns: TYPE COMMENT
00178 
00179   // Function Name: createHashTable
00180   // Arguments: SequenceReader& (in), int (in), int (in)
00181   // Reads sequence information from an instance of SequenceReader and
00182   // uses it to create a hash table
00183   //  virtual void createHashTable
00184   //  ( SequenceReader& sequenceReader, int wordLength, int maxNumHits, 
00185   //   int stepLength = 0 );
00186 
00187 
00188   virtual void loadHashTable( SourceReaderIndex* pSourceReader=NULL );
00189   virtual void saveHashTable( void );
00190 
00191 
00192 
00193   virtual void setupPointerArray( void );
00194   virtual int countWordsAndGetNames
00195     ( SequenceReader& sequenceReader, SequenceAdapter* seq );
00196 
00197   virtual void computePointerArray( void );
00198   virtual void setupHitList( void );
00199   virtual void hashAllWords
00200     ( SequenceReader& sequenceReader, SequenceAdapter* seq, int numSeqs );
00201   virtual void cleanupTempData( void );
00202 
00203   virtual void hashWords
00204     ( SequenceAdapter& thisSeq, SequenceNumber seqNum );
00205   virtual void countWords( SequenceAdapter& thisSeq );
00206 
00207   virtual void matchSequence
00208     ( WordSequence& seq, HitList& hitListFwd )
00209   {
00210     (this->*pMatchSequence_)(seq, hitListFwd);
00211   }
00212 
00213   void matchSequenceProtein( WordSequence& seq, HitList& hitListFwd );
00214   void matchSequenceTranslatedDNA( WordSequence& seq, HitList& hitListFwd );
00215 
00216 
00217   virtual void setNumRepeats( int nr);
00218   virtual void setSubstituteThreshold( int ns );
00219 
00220   virtual char* getHitListStart( void ) const;
00221   virtual int getHitTypeSize( void ) const;
00222   virtual void allocateHitList( unsigned long size );
00223   virtual void loadHitList( unsigned long size );
00224   virtual void saveHitList( void );
00225   //  virtual void savePointerArray( void );
00226 
00227   virtual int  getMaxNumHits() const; 
00228 
00229   virtual void setMaxNumHits( int mnh ); 
00230 
00231   // Function Name: printHashStats
00232   // Prints some stats about the hash table
00233   virtual void printHashStats( void ); 
00234 
00235   // Need to redefine this so that expected number of words bit
00236   // in SSAHAMain.cpp works properly.
00237   virtual unsigned long getTotalNumWords( void ) const; 
00238 
00239 
00240   void setForward( void ) { pHash_ = &hashFwd_; }
00241   void setReverse( void ) { pHash_ = &hashRev_; }
00242 
00243   void setQueryProtein( void ) 
00244   { 
00245     pMatchSequence_ = &HashTableTranslated::matchSequenceProtein;
00246   }
00247   void setQueryTranslatedDNA( void ) 
00248   {
00249     pMatchSequence_ = &HashTableTranslated::matchSequenceTranslatedDNA;
00250   }
00251 
00252   // Accessor Functions
00253   // (NB all accessor functions should be 'const')
00254 
00255   // Function Name:
00256   // Arguments:
00257   // TYPE  NAME  IN/OUT COMMENT
00258   // Returns: TYPE COMMENT
00259   bool isForward( void ) const { return (pHash_==&hashFwd_); }
00260 
00261   
00262   // PROTECTED MEMBER FUNCTIONS 
00263   // (visible to this class and derived classes only)
00264   protected:
00265 
00266   // PRIVATE MEMBER FUNCTIONS
00267   // (visible to instances of this class only)
00268   
00269   private:
00270   HashTableTranslated( const HashTableTranslated&);          // NOT IMPLEMENTED
00271   HashTableTranslated& operator=(const HashTableTranslated&);// NOT IMPLEMENTED
00272 
00273   // PRIVATE MEMBER DATA
00274   protected:
00275   int translatedWordLength_;
00276 
00277   HashTableComponent hashFwd_;
00278   HashTableComponent hashRev_;
00279   HashTableComponent* pHash_;
00280 
00281   CodonList codons_;
00282   SequenceEncoderCodon codonEncoder_;
00283 
00284   MatchSequencePointer pMatchSequence_;
00285 
00286 
00287 }; // ~HashTableTranslated
00288 
00289 // ### Function Declarations ###
00290 
00291 // Name:
00292 // Arguments:
00293 // TYPE  NAME  IN/OUT COMMENT
00294 // Returns: TYPE COMMENT
00295 
00296 // End of include guard:
00297 #endif
00298 
00299 // End of file HashTableTranslated.h

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2