#include <HashTable.h>
Inheritance diagram for HashTable:


Definition at line 41 of file HashTable.h.
| typedef void(HashTable::* ) HashTable::MatchSequencePointer(WordSequence &, HitList &) |
Definition at line 46 of file HashTable.h.
| HashTable::HashTable | ( | ostream & | monitoringStream = cerr, |
|
| string | name = "", |
|||
| Allocator< PositionInDatabase > & | hitListAllocator = defaultHitListAllocator, |
|||
| Allocator< PositionInHitList > & | arrayAllocator = defaultArrayAllocator | |||
| ) | [inline] |
Definition at line 52 of file HashTable.h.
References gStandard, HashTableGeneric::hitListFormat_, and HashTableGeneric::monitoringStream_.
00057 : 00058 HashTableView<PositionInDatabase,HashTable> 00059 (monitoringStream, name, hitListAllocator, arrayAllocator), 00060 pMatchSequence_(&HashTable::matchSequenceStandard), 00061 numRepeats_(0) 00062 { 00063 hitListFormat_ = gStandard; 00064 monitoringStream_ << "constructing HashTable\n"; 00065 }
| SequenceNumber HashTable::getSequence | ( | const_iterator | i | ) | [inline, static] |
| SequenceOffset HashTable::getOffset | ( | const_iterator | i | ) | [inline, static] |
Definition at line 144 of file HashTable.h.
References PositionInDatabase::offset.
00145 { 00146 return i->offset; 00147 } // ~SequenceOffset HashTableView<PositionInDatabase>::getOffset
| void HashTable::setNumRepeats | ( | int | numRepeats | ) | [virtual] |
Implements HashTableGeneric.
Definition at line 58 of file HashTable.cpp.
References matchSequenceRepeated(), matchSequenceStandard(), numRepeats_, pMatchSequence_, and HashTableGeneric::stepLength_.
Referenced by main().
00059 { 00060 if ( (numRepeats<0) || (numRepeats>stepLength_) ) 00061 throw SSAHAException("Invalid value for numRepeats!!"); 00062 numRepeats_=numRepeats; 00063 pMatchSequence_ = ( numRepeats==0 ) 00064 ? &HashTable::matchSequenceStandard 00065 : &HashTable::matchSequenceRepeated; 00066 }
Here is the call graph for this function:

Here is the caller graph for this function:

| virtual void HashTable::matchSequence | ( | WordSequence & | seq, | |
| HitList & | hitListFwd | |||
| ) | [inline, virtual] |
Implements HashTableGeneric.
Definition at line 93 of file HashTable.h.
References pMatchSequence_.
Referenced by main().
00094 { (this->*pMatchSequence_)(seq, hitListFwd); }
Here is the caller graph for this function:

| void HashTable::hashWords | ( | SequenceAdapter & | thisSeq, | |
| SequenceNumber | seqNum | |||
| ) | [virtual] |
Implements HashTableView< PositionInDatabase, HashTable >.
Definition at line 279 of file HashTable.cpp.
References gCursedWord, PositionInDatabase::offset, HashTableView< PositionInDatabase, HashTable >::pHitListForAllWords_, HashTableGeneric::pHitsFoundSoFar_, HashTableGeneric::pWordPositionInHitList_, PositionInDatabase::sequence, SequenceAdapter::size(), and HashTableGeneric::stepLength_.
00280 { 00281 00282 register Word thisWord; 00283 register PositionInHitList currentPos; 00284 // NB We stop at the last but one element of the 00285 // sequence (as the last isn't a full word) 00286 00287 for ( int j(0) ; j < thisSeq.size() ; ++ j ) 00288 { 00289 thisWord = thisSeq[j]; 00290 // only hash words that have not been flagged 00291 if ((thisWord&gCursedWord)!=(Word)0) continue; 00292 currentPos 00293 = pHitsFoundSoFar_[thisWord] 00294 +( ( thisWord == 0 ) 00295 ? 0 : pWordPositionInHitList_[thisWord - 1]) ; 00296 00297 if ( currentPos != pWordPositionInHitList_[thisWord] ) 00298 { // then place position in the hit list 00299 pHitListForAllWords_[currentPos].sequence 00300 = seqNum; 00301 pHitListForAllWords_[currentPos].offset 00302 = j * stepLength_; 00303 // DEBUG_L2("list "<< printWord(thisWord,wordLength_) 00304 // << " "<< seqNum << " " << j*stepLength_ ); 00305 pHitsFoundSoFar_[thisWord]++; 00306 } // ~if 00307 00308 } // ~ for thisWord 00309 00310 00311 } // ~HashTable::hashWords
Here is the call graph for this function:

| void HashTable::countWords | ( | SequenceAdapter & | thisSeq | ) | [virtual] |
Implements HashTableView< PositionInDatabase, HashTable >.
Reimplemented in HashTableTest.
Definition at line 265 of file HashTable.cpp.
References gCursedWord, HashTableGeneric::pWordPositionInHitList_, and SequenceAdapter::size().
00266 { 00267 00268 for ( int j(0) ; j < thisSeq.size() ; ++ j ) 00269 { 00270 // only count words that have not been flagged 00271 pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))] 00272 += ((thisSeq[j]&gCursedWord)==(Word)0); 00273 // pWordPositionInHitList_[thisSeq[j]]++; 00274 } 00275 00276 } // ~HashTable::countWords
Here is the call graph for this function:

| void HashTable::matchSequenceStandard | ( | WordSequence & | seq, | |
| HitList & | hitListFwd | |||
| ) |
Definition at line 81 of file HashTable.cpp.
References HashTableGeneric::bitsPerSymbol_, WordSequence::getNumBasesInLast(), HashTableView< PositionInDatabase, HashTable >::matchWord(), shiftSequence(), and HashTableGeneric::wordLength_.
Referenced by setNumRepeats().
00082 { 00083 00084 int numBasesInLast( seq.getNumBasesInLast() ); 00085 00086 for ( int i(0) ; i < wordLength_ ; ++i ) 00087 { 00088 matchWord( seq, hitListFwd, i ); 00089 shiftSequence( seq, bitsPerSymbol_, wordLength_ ); 00090 if ( i == numBasesInLast ) 00091 { 00092 seq.pop_back(); 00093 } // ~if 00094 } // ~for 00095 00096 } // ~HashTable::matchSequence
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTable::matchSequenceRepeated | ( | WordSequence & | seq, | |
| HitList & | hitListFwd | |||
| ) |
Definition at line 104 of file HashTable.cpp.
References HitListVector::addHit(), RepeatedHit::cyclePos, hits, HashTableView< PositionInDatabase, HashTable >::matchWord(), numRepeats_, WordSequenceShifted::size(), HashTableGeneric::stepLength_, and HashTableGeneric::wordLength_.
Referenced by setNumRepeats().
00106 { 00107 WordSequenceShifted seqShifted(seq, *this); 00108 // screenRepeats( seqShifted, hitListFwd, numRepeats_ ); 00109 00110 Word thisWord; 00111 00112 int m; 00113 00114 // cout << "my size: " << size() << endl; 00115 00116 // i cycles through each full word in the query sequence 00117 for ( int i(0) ; i < seqShifted.size() ; ++i ) 00118 { 00119 00120 00121 // cout << "doing i:" << i << endl; 00122 thisWord = seqShifted[i]; 00123 m = 0; 00124 00125 // look through the next numRepeats_ words for duplicates 00126 for ( int j(i+1) ; 00127 ( ( j < seqShifted.size() ) && ( j <= i + numRepeats_ ) ); 00128 ++j ) 00129 { 00130 if ( thisWord == seqShifted[j] ) 00131 { 00132 m = j - i; 00133 // cout << "Tandem repeat: " << i << "-" << j << "\n"; // %%%% 00134 break; 00135 } // ~if 00136 } // ~for j 00137 if ( m == 0 ) 00138 { 00139 // cout << "doing bog standard matching for:" << i << endl; 00140 matchWord( thisWord, hitListFwd, i ); 00141 } // ~if 00142 else 00143 { 00144 // ... then we have found a tandem repeat of length m 00145 int r(1); 00146 00147 // scan forward until we reach either the end of the 00148 // repeated region or the end of the sequence 00149 while ( ( seqShifted[i+(r*m)]==thisWord ) 00150 && ( i+(r*m) < seqShifted.size() ) ) ++r; 00151 00152 // cout << "Num repeats: " << r << endl; 00153 00154 // any hits in a run of matching hits that exceed lastRun are 00155 // ignored, because in that case the size of the repeated 00156 // region in the subject sequence exceeds the size of the 00157 // repeated region in the query sequence 00158 int lastRun((r-1)*m); 00159 00160 // cout << "size of repeated run: " << lastRun << endl; 00161 00162 while ( seqShifted[i+lastRun] == seqShifted[i+lastRun-m] ) lastRun++; 00163 lastRun--; 00164 00165 // cout << "adjusted size of repeated run: " << lastRun << endl; 00166 00167 HitListRepeated hits; 00168 00169 // as we proceed base by base along a region of tandem repeats 00170 // of motif length m, we encounter m distinct hash words, after 00171 // that, they repeat. Now get the hits for each of these words: 00172 // passing in j tags each hit with its position in the repeat cycle 00173 for ( int j(0) ; j < m ; ++j ) 00174 { 00175 matchWord( seqShifted[i+j], hits, j); 00176 } // ~for j 00177 00178 // sort hits in order of RepeatedHit::subjectPos 00179 sort( hits.begin(), hits.end() ); 00180 00181 // lastHit = previous hit in list, initialized to all zeroes 00182 RepeatedHit lastHit; 00183 // firstHit = first hit of a matching run, initialized to all zeroes 00184 RepeatedHit firstHit; 00185 00186 // thisRun = size of current run of matching hits 00187 int thisRun(0); 00188 00189 00190 for ( HitListRepeated::iterator thisHit( hits.begin() ); 00191 thisHit != hits.end() ; ++thisHit ) 00192 { 00193 // cout << ": " << (*thisHit).subjectPos.sequence << " " 00194 // << (*thisHit).subjectPos.offset << " " 00195 // << (*thisHit).cyclePos ; 00196 if ( ( (*thisHit).subjectPos.sequence 00197 == lastHit.subjectPos.sequence ) 00198 && ( (*thisHit).subjectPos.offset 00199 == lastHit.subjectPos.offset + stepLength_ ) 00200 && ( (*thisHit).cyclePos 00201 == (( lastHit.cyclePos + stepLength_ ) % m) ) ) 00202 { 00203 if ( thisRun == 0 ) 00204 { 00205 // cout << " -s- "; 00206 // then a run if matching hits has started 00207 firstHit = lastHit; 00208 thisRun = wordLength_; 00209 } // ~if 00210 else 00211 { 00212 // cout << " -c- "; 00213 // we continue an existing run 00214 thisRun += stepLength_; 00215 } 00216 00217 00218 } // ~if 00219 else 00220 { 00221 thisRun=0; 00222 firstHit = *thisHit; 00223 } 00224 // cout << " -n- "; 00225 if (thisRun <= lastRun ) 00226 { 00227 // only output hits if length of repeated region in subject 00228 // is less than or equal to that of query 00229 // cout << "added" << (*thisHit).subjectPos.offset 00230 // << "-" << i + firstHit.cyclePos + thisRun; 00231 hitListFwd.addHit( thisHit->subjectPos, 00232 i + firstHit.cyclePos + thisRun ); 00233 } else // cout << "ignored" << (*thisHit).subjectPos.offset 00234 // << "-" << i + firstHit.cyclePos + thisRun; 00235 00236 00237 lastHit = *thisHit; 00238 // cout << endl; 00239 } // ~for 00240 00241 // carry on at the end of the repeated region in the query seq 00242 // i += (r-1)*m+1; 00243 // Actually want 1 less cos i is incremented anyway at the 00244 // end of the for loop - TC 5.7.1 00245 // i += (r-1)*m; 00246 i += lastRun; 00247 // cout << "Carrying on at pos " << i+1 << "\n"; 00248 // break out of the `for m' loop 00249 // break; 00250 00251 00252 00253 } // ~else 00254 00255 } // ~for i 00256 00257 00258 00259 } // ~HashTable::matchSequenceRepeated
Here is the call graph for this function:

Here is the caller graph for this function:

AllocatorLocal< PositionInHitList > HashTable::defaultArrayAllocator [static, private] |
Definition at line 43 of file HashTable.h.
AllocatorLocal< PositionInDatabase > HashTable::defaultHitListAllocator [static, private] |
Definition at line 44 of file HashTable.h.
Definition at line 105 of file HashTable.h.
Referenced by matchSequenceRepeated(), and setNumRepeats().
1.5.2