00001 /* Last edited: Mar 22 11:36 2002 (ac2) */ 00002 00003 // ####################################################################### 00004 00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm 00006 // Version 3.2, released 1st March 2004 00007 // Copyright (c) Genome Research 2002 00008 00009 // SSAHA is free software; you can redistribute it and/or modify 00010 // it under the terms of version 2 of the GNU General Public Licence 00011 // as published by the Free Software Foundation. 00012 00013 // This program is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 // GNU General Public Licence for more details. 00017 00018 // You should have received a copy of the GNU General Public Licence 00019 // along with this program; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt 00022 00023 // ####################################################################### 00024 00025 // Module Name : HashTable 00026 // File Name : HashTable.h 00027 // Language : C++ 00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk) 00029 00030 // Include guard: 00031 #ifndef INCLUDED_HashTable 00032 #define INCLUDED_HashTable 00033 00034 // Description: 00035 00036 // Includes: 00037 #include "HashTableGeneric.h" 00038 00039 class WordSequenceShifted; 00040 00041 class HashTable : public HashTableView<PositionInDatabase,HashTable> 00042 { 00043 static AllocatorLocal<PositionInHitList> defaultArrayAllocator; 00044 static AllocatorLocal<PositionInDatabase> defaultHitListAllocator; 00045 public: 00046 typedef void (HashTable::* MatchSequencePointer )( WordSequence&, HitList& ); 00047 00048 // HashTable( ostream& monitoringStream=cerr) : 00049 // HashTableView<PositionInDatabase,HashTable>(monitoringStream), 00050 // pMatchSequence_(&HashTable::matchSequenceStandard), 00051 // numRepeats_(0){} 00052 HashTable( ostream& monitoringStream=cerr, 00053 string name="", 00054 Allocator<PositionInDatabase>& hitListAllocator 00055 = defaultHitListAllocator, 00056 Allocator<PositionInHitList>& arrayAllocator 00057 = defaultArrayAllocator ): 00058 HashTableView<PositionInDatabase,HashTable> 00059 (monitoringStream, name, hitListAllocator, arrayAllocator), 00060 pMatchSequence_(&HashTable::matchSequenceStandard), 00061 numRepeats_(0) 00062 { 00063 hitListFormat_ = gStandard; 00064 monitoringStream_ << "constructing HashTable\n"; 00065 } 00066 00067 inline static SequenceNumber getSequence( const_iterator i ); 00068 inline static SequenceOffset getOffset( const_iterator i ); 00069 00070 00071 // Function Name: matchWord 00072 // Arguments: Word (in), HitList& (out) 00073 // Populates hitsFound with the positions in the subject sequence database 00074 // of all occurrences of the Word queryWord. 00075 // void matchWord 00076 // ( Word queryWord, HitList& hitsFound, int baseOffset=0 ) const; 00077 00078 // Function Name: matchWord 00079 // Arguments: WordSequence& (in), HitList& (out) 00080 // Populates hitsFound with the positions in the database of all occurrences 00081 // of the Words in the WordSequence queryWords. baseOffset is the initial 00082 // shift in base pairs to be subtracted from all hit positions (TBD explain 00083 // this better!) 00084 // void matchWord 00085 // ( const WordSequence& queryWords, 00086 // HitList& hitsFound, 00087 // int baseOffset = 0 ) const; 00088 00089 00090 virtual void setNumRepeats( int numRepeats ); 00091 00092 virtual void matchSequence 00093 ( WordSequence& seq, HitList& hitListFwd ) 00094 { (this->*pMatchSequence_)(seq, hitListFwd); } 00095 00096 // void screenRepeats 00097 // ( WordSequenceShifted& seq, HitList& hitsOut, int numRepeats ); 00098 00099 virtual void hashWords 00100 ( SequenceAdapter& thisSeq, SequenceNumber seqNum ); 00101 virtual void countWords( SequenceAdapter& thisSeq ); 00102 00103 // protected: 00104 MatchSequencePointer pMatchSequence_; 00105 int numRepeats_; 00106 // Function Name: matchSequence 00107 // Arguments: WordSequence& (in), HitList& (out), HitList& (out) 00108 // Returns: void 00109 // This obtains the full list of hits for a sequence in both forward 00110 // and reverse directions. Proceeds as follows: 00111 // 1. The reverse complement of the sequence is formed. 00112 // 2. Any hits found in the forward or reverse direction are added to the 00113 // appropriate list. 00114 // 3. The sequence and reverse complement are left-shifted by 1 base 00115 // Steps 2 and 3 are repeated wordLength_ times. 00116 // NB This function will modify seq. If you want to keep it, make a copy 00117 // before calling this function. 00118 void matchSequenceStandard 00119 ( WordSequence& seq, HitList& hitListFwd ); 00120 00121 // Function Name: matchSequence 00122 // Arguments: WordSequence& (in), HitList& (out), HitList& (out), int (in) 00123 // Returns: void 00124 // This obtains the full list of hits for a sequence in both forward 00125 // and reverse directions and masks out tandem repeats. 00126 void matchSequenceRepeated 00127 ( WordSequence& seq, 00128 HitList& hitListFwd ); 00129 // int numRepeats ); 00130 00131 00132 00133 }; // ~class HashTable 00134 00135 //SequenceNumber HashTableView<PositionInDatabase>::getSequence 00136 SequenceNumber HashTable::getSequence 00137 ( const_iterator i ) 00138 { 00139 return i->sequence; 00140 } // ~SequenceNumber HashTable 00141 00142 //SequenceOffset HashTableView<PositionInDatabase>::getOffset 00143 SequenceOffset HashTable::getOffset 00144 ( const_iterator i ) 00145 { 00146 return i->offset; 00147 } // ~SequenceOffset HashTableView<PositionInDatabase>::getOffset 00148 00149 00150 00151 00152 // Struct Name: RepeatedHit 00153 // Description: This contains the information that needs to be stored for 00154 // each hit in a region of tandem repeats 00155 struct RepeatedHit 00156 { 00157 00158 RepeatedHit( const PositionInDatabase& subjectPos_, 00159 const SequenceOffset& cyclePos_ ) : 00160 subjectPos( subjectPos_ ), cyclePos( cyclePos_ ) {} 00161 00162 RepeatedHit( void ) : subjectPos(0,0), cyclePos(0) 00163 { 00164 // subjectPos.sequence = 0; 00165 // subjectPos.offset = 0; 00166 } 00167 00168 bool operator<( const RepeatedHit& rhs) const 00169 { 00170 return ( subjectPos < rhs.subjectPos ); 00171 } // ~operator< 00172 00173 // subjectPos: position of the hit in the subject database 00174 PositionInDatabase subjectPos; 00175 // cyclePos: hash words obtained from a tandem repeat region in the 00176 // query sequence will repeat every m words, where m is the length of 00177 // the repeating motif. cyclePos denotes the position of the current 00178 // word in this cycle, and takes a value from 0 to m-1. 00179 SequenceOffset cyclePos; 00180 }; 00181 00182 // Class Name: HitListRepeated 00183 // Description: store for a list of RepeatedHits. Made a subclass of HitList 00184 // so that matchWord can put hits into it. 00185 class HitListRepeated: public vector<RepeatedHit> 00186 { 00187 public: 00188 virtual ~HitListRepeated() {} 00189 void addHit( const PositionInDatabase& hitPos, 00190 const SequenceOffset& queryPos ) 00191 { 00192 push_back( RepeatedHit( hitPos, queryPos ) ); 00193 } 00194 }; 00195 00196 00197 class HashTableFred : public HashTable 00198 { 00199 public: 00200 HashTableFred( ostream& monitoringStream=cerr, 00201 string name="" ) : 00202 HashTable( monitoringStream, name ) 00203 { 00204 monitoringStream_ << "making HashTableFred" << endl; 00205 bitsPerSymbol_ = gResidueBits; 00206 } 00207 00208 }; 00209 00210 00211 00212 00213 // ### Function Declarations ### 00214 00215 // makeWord moved to Global/GlobalDefintions - TC 8.3.1 00216 00217 // End of include guard: 00218 #endif 00219 00220 // End of file HashTable.h
1.5.2