HashTable/HashTable.h

Go to the documentation of this file.
00001 /*  Last edited: Mar 22 11:36 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : HashTable
00026 // File Name    : HashTable.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_HashTable
00032 #define INCLUDED_HashTable
00033 
00034 // Description:
00035 
00036 // Includes:
00037 #include "HashTableGeneric.h"
00038 
00039 class WordSequenceShifted;
00040 
00041 class HashTable : public HashTableView<PositionInDatabase,HashTable>
00042 {
00043   static AllocatorLocal<PositionInHitList> defaultArrayAllocator;
00044   static AllocatorLocal<PositionInDatabase> defaultHitListAllocator;
00045  public:
00046   typedef void (HashTable::* MatchSequencePointer )( WordSequence&, HitList& );
00047   
00048   //  HashTable( ostream& monitoringStream=cerr) :
00049   //    HashTableView<PositionInDatabase,HashTable>(monitoringStream), 
00050   //   pMatchSequence_(&HashTable::matchSequenceStandard),
00051   //   numRepeats_(0){}
00052   HashTable( ostream& monitoringStream=cerr,
00053              string name="",
00054              Allocator<PositionInDatabase>& hitListAllocator 
00055              = defaultHitListAllocator,
00056              Allocator<PositionInHitList>& arrayAllocator 
00057              = defaultArrayAllocator ):
00058   HashTableView<PositionInDatabase,HashTable>
00059     (monitoringStream, name, hitListAllocator, arrayAllocator), 
00060   pMatchSequence_(&HashTable::matchSequenceStandard),
00061   numRepeats_(0)
00062   {
00063     hitListFormat_ = gStandard;
00064     monitoringStream_ << "constructing HashTable\n";
00065   }
00066 
00067   inline static SequenceNumber getSequence( const_iterator i );
00068   inline static SequenceOffset getOffset( const_iterator i );
00069 
00070 
00071   // Function Name: matchWord
00072   // Arguments: Word (in), HitList& (out)
00073   // Populates hitsFound with the positions in the subject sequence database 
00074   // of all occurrences of the Word queryWord.
00075   //  void matchWord
00076   //  ( Word queryWord, HitList& hitsFound, int baseOffset=0 ) const;
00077 
00078   // Function Name: matchWord
00079   // Arguments: WordSequence& (in), HitList& (out)
00080   // Populates hitsFound with the positions in the database of all occurrences
00081   // of the Words in the WordSequence queryWords. baseOffset is the initial 
00082   // shift in base pairs to be subtracted from all hit positions (TBD explain
00083   // this better!)
00084   //  void matchWord
00085   // ( const WordSequence& queryWords, 
00086   //   HitList& hitsFound, 
00087   //   int baseOffset = 0 ) const;
00088 
00089 
00090   virtual void setNumRepeats( int numRepeats );
00091 
00092   virtual void matchSequence
00093   ( WordSequence& seq, HitList& hitListFwd )
00094   { (this->*pMatchSequence_)(seq, hitListFwd); }
00095 
00096   //  void screenRepeats
00097   //   ( WordSequenceShifted& seq, HitList& hitsOut, int numRepeats );
00098 
00099   virtual void hashWords
00100   ( SequenceAdapter& thisSeq, SequenceNumber seqNum );
00101   virtual void countWords( SequenceAdapter& thisSeq );
00102 
00103   // protected:
00104   MatchSequencePointer pMatchSequence_;
00105   int numRepeats_;
00106   // Function Name: matchSequence
00107   // Arguments: WordSequence& (in), HitList& (out), HitList& (out)
00108   // Returns: void
00109   // This obtains the full list of hits for a sequence in both forward
00110   // and reverse directions. Proceeds as follows:
00111   // 1. The reverse complement of the sequence is formed.
00112   // 2. Any hits found in the forward or reverse direction are added to the
00113   // appropriate list.
00114   // 3. The sequence and reverse complement are left-shifted by 1 base
00115   // Steps 2 and 3 are repeated wordLength_ times.
00116   // NB This function will modify seq. If you want to keep it, make a copy
00117   // before calling this function.
00118   void matchSequenceStandard
00119   ( WordSequence& seq, HitList& hitListFwd );
00120 
00121   // Function Name: matchSequence
00122   // Arguments: WordSequence& (in), HitList& (out), HitList& (out), int (in)
00123   // Returns: void
00124   // This obtains the full list of hits for a sequence in both forward
00125   // and reverse directions and masks out tandem repeats.
00126   void matchSequenceRepeated
00127   ( WordSequence& seq, 
00128     HitList& hitListFwd ); 
00129   //    int numRepeats );
00130 
00131 
00132 
00133 }; // ~class HashTable
00134 
00135 //SequenceNumber HashTableView<PositionInDatabase>::getSequence
00136 SequenceNumber HashTable::getSequence
00137 ( const_iterator i ) 
00138 {
00139   return i->sequence;
00140 } // ~SequenceNumber HashTable
00141   
00142 //SequenceOffset HashTableView<PositionInDatabase>::getOffset
00143 SequenceOffset HashTable::getOffset
00144 ( const_iterator i ) 
00145 {
00146   return i->offset;
00147 } // ~SequenceOffset HashTableView<PositionInDatabase>::getOffset
00148 
00149 
00150 
00151 
00152 // Struct Name: RepeatedHit
00153 // Description: This contains the information that needs to be stored for
00154 // each hit in a region of tandem repeats
00155 struct RepeatedHit
00156 {
00157 
00158   RepeatedHit( const PositionInDatabase& subjectPos_,  
00159                const SequenceOffset& cyclePos_ ) :
00160   subjectPos( subjectPos_ ), cyclePos( cyclePos_ ) {}
00161 
00162   RepeatedHit( void ) : subjectPos(0,0), cyclePos(0) 
00163   { 
00164     //    subjectPos.sequence = 0; 
00165     //   subjectPos.offset   = 0;
00166   }
00167 
00168   bool operator<( const RepeatedHit& rhs) const
00169   {
00170     return ( subjectPos < rhs.subjectPos );
00171   } // ~operator<
00172 
00173   // subjectPos: position of the hit in the subject database
00174   PositionInDatabase subjectPos;
00175   // cyclePos: hash words obtained from a tandem repeat region in the
00176   // query sequence will repeat every m words, where m is the length of
00177   // the repeating motif. cyclePos denotes the position of the current
00178   // word in this cycle, and takes a value from 0 to m-1.
00179   SequenceOffset cyclePos;
00180 };
00181 
00182 // Class Name:  HitListRepeated
00183 // Description: store for a list of RepeatedHits. Made a subclass of HitList
00184 // so that matchWord can put hits into it.
00185 class HitListRepeated: public vector<RepeatedHit>
00186 {
00187   public:
00188   virtual ~HitListRepeated() {}
00189   void addHit( const PositionInDatabase& hitPos, 
00190                const SequenceOffset& queryPos )
00191   {
00192     push_back( RepeatedHit( hitPos, queryPos ) );
00193   }
00194 };
00195 
00196 
00197 class HashTableFred : public HashTable
00198 {
00199  public:
00200   HashTableFred( ostream& monitoringStream=cerr,
00201                  string name="" ) : 
00202     HashTable( monitoringStream, name )
00203     {
00204       monitoringStream_ << "making HashTableFred" << endl;
00205       bitsPerSymbol_ = gResidueBits;
00206     }
00207 
00208 };
00209 
00210 
00211 
00212 
00213 // ### Function Declarations ###
00214 
00215 // makeWord moved to Global/GlobalDefintions - TC 8.3.1
00216 
00217 // End of include guard:
00218 #endif
00219 
00220 // End of file HashTable.h

Generated on Fri Dec 21 13:12:15 2007 for ssaha by  doxygen 1.5.2