#include <HashTablePacked.h>
Inheritance diagram for HashTablePacked:


Definition at line 213 of file HashTablePacked.h.
| typedef void(HashTablePacked::* ) HashTablePacked::MatchSequencePointer(WordSequence &, HitList &) |
| typedef void(HashTablePacked::* ) HashTablePacked::MatchWordPointer(Word, PackedHitStore &, int) |
Definition at line 225 of file HashTablePacked.h.
| typedef void(* ) HashTablePacked::GenerateSubstitutesPointer(Word, vector< Word > &, int) |
Definition at line 228 of file HashTablePacked.h.
| HashTablePacked::HashTablePacked | ( | ostream & | monitoringStream = cerr, |
|
| string | name = "", |
|||
| Allocator< PositionPacked > & | hitListAllocator = defaultHitListAllocator, |
|||
| Allocator< PositionInHitList > & | arrayAllocator = defaultArrayAllocator | |||
| ) | [inline] |
Definition at line 230 of file HashTablePacked.h.
References g32BitPacked, HashTableGeneric::hitListFormat_, HashTableGeneric::monitoringStream_, and seqStarts_.
00235 : 00236 HashTableView<PositionPacked,HashTablePacked> 00237 (monitoringStream, name, hitListAllocator, arrayAllocator), 00238 wordNum_(0), 00239 pMatchSequence_(&HashTablePacked::matchSequenceStandard), 00240 pMatchWord_(&HashTablePacked::matchWordStandard), 00241 pGenerateSubstitutes_(&generateSubstitutesDNA), 00242 numRepeats_(0), 00243 substituteThreshold_(0), 00244 sorter_(4,(sizeof(PositionPacked)*8)/4) 00245 { 00246 hitListFormat_ = g32BitPacked; 00247 seqStarts_.push_back(0); 00248 monitoringStream_ << "constructing HashTablePacked\n"; 00249 // cout << pArrayAllocator_->name_ << " " 00250 // << pHitListAllocator_->name_ << endl; // %%%% 00251 00252 }
| static SequenceNumber HashTablePacked::getSequence | ( | const_iterator | i | ) | [inline, static] |
| static SequenceOffset HashTablePacked::getOffset | ( | const_iterator | i | ) | [inline, static] |
| void HashTablePacked::hashWords | ( | SequenceAdapter & | thisSeq, | |
| SequenceNumber | seqNum | |||
| ) | [virtual] |
Implements HashTableView< PositionPacked, HashTablePacked >.
Definition at line 146 of file HashTablePacked.cpp.
References gCursedWord, HashTableView< PositionPacked, HashTablePacked >::pHitListForAllWords_, HashTableGeneric::pHitsFoundSoFar_, HashTableGeneric::pWordPositionInHitList_, seqStarts_, SequenceAdapter::size(), and wordNum_.
Referenced by HashTableTranslated::hashAllWords(), and HashTableTranslated::hashWords().
00147 { 00148 00149 register Word thisWord; 00150 register PositionInHitList currentPos; 00151 // NB We stop at the last but one element of the 00152 // sequence (as the last isn't a full word) 00153 00154 for ( int j(0) ; j < thisSeq.size() ; ++ j ) 00155 { 00156 thisWord = thisSeq[j]; 00157 00158 // only hash words that have not been flagged 00159 if ((thisWord&gCursedWord)==(Word)0) 00160 { 00161 00162 currentPos 00163 = pHitsFoundSoFar_[thisWord] 00164 +( ( thisWord == 0 ) 00165 ? 0 : pWordPositionInHitList_[thisWord - 1]) ; 00166 00167 if ( currentPos != pWordPositionInHitList_[thisWord] ) 00168 { // then place position in the hit list 00169 pHitListForAllWords_[currentPos] = wordNum_; 00170 pHitsFoundSoFar_[thisWord]++; 00171 // next line moved: wordNum still needs incrementing even if 00172 // word is flagged 00173 // wordNum_++; 00174 } // ~if 00175 else assert(1==0); 00176 } // ~if 00177 00178 wordNum_++; 00179 00180 } // ~ for thisWord 00181 00182 seqStarts_.push_back(wordNum_); 00183 00184 } // ~HashTable::hashWords
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::countWords | ( | SequenceAdapter & | thisSeq | ) | [virtual] |
Implements HashTableView< PositionPacked, HashTablePacked >.
Definition at line 132 of file HashTablePacked.cpp.
References gCursedWord, HashTableGeneric::pWordPositionInHitList_, and SequenceAdapter::size().
Referenced by HashTableTranslated::countWordsAndGetNames().
00133 { 00134 00135 for ( int j(0) ; j < thisSeq.size() ; ++ j ) 00136 { 00137 // only count words that have not been flagged 00138 pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))] 00139 += ((thisSeq[j]&gCursedWord)==(Word)0); 00140 // pWordPositionInHitList_[thisSeq[j]]++; 00141 } 00142 // cout << endl; 00143 } // ~HashTable::countWords
Here is the call graph for this function:

Here is the caller graph for this function:

| virtual void HashTablePacked::matchSequence | ( | WordSequence & | seq, | |
| HitList & | hitListFwd | |||
| ) | [inline, virtual] |
Implements HashTableGeneric.
Reimplemented in HashTablePackedProtein.
Definition at line 267 of file HashTablePacked.h.
References pMatchSequence_.
Referenced by main(), HashTableTranslated::matchSequenceProtein(), HashTablePackedProtein::matchSequenceProtein(), HashTableTranslated::matchSequenceTranslatedDNA(), and HashTablePackedProtein::matchSequenceTranslatedDNA().
00268 { (this->*pMatchSequence_)(seq,hitListFwd); }
Here is the caller graph for this function:

| void HashTablePacked::convertHits | ( | PackedHitStore & | packedHits, | |
| HitList & | hitListFwd | |||
| ) | [virtual] |
Reimplemented in HashTableComponent, and HashTablePackedProtein.
Definition at line 225 of file HashTablePacked.cpp.
References HitListVector::addHit(), seqStarts_, sorter_, and HashTableGeneric::stepLength_.
Referenced by matchSequenceRepeated(), and matchSequenceStandard().
00226 { 00227 00228 // sort(packedHits.begin(),packedHits.end()); 00229 sorter_(packedHits); 00230 00231 00232 vector<HitPacked>::iterator i(packedHits.begin()); 00233 vector<SeqStartPos>::iterator ub, j(seqStarts_.begin()); 00234 HitListVector::size_type sortStart; 00235 // hitListFwd.reserve(hitListFwd.size()+packedHits.size()); 00236 00237 // NB seqStarts_ must not be empty else call to back() segfaults 00238 00239 while 00240 ( (j!=static_cast<vector<SeqStartPos>::iterator>(&seqStarts_.back())) 00241 && (i!=packedHits.end()) ) 00242 { 00243 ub = upper_bound(j,seqStarts_.end(),i->first); 00244 j=ub; j--; 00245 sortStart=hitListFwd.size(); 00246 while ((i!=packedHits.end())&&(i->first<*ub)) 00247 { 00248 // cout << i->first << " ... " 00249 // << ub-seqStarts_.begin() << " " 00250 // << stepLength_*(i->first - *j) 00251 // << endl; 00252 hitListFwd.addHit( ub-seqStarts_.begin(), 00253 stepLength_*(i->first - *j), 00254 i->second ); 00255 i++; 00256 00257 } // ~while 00258 sort 00259 ( static_cast<HitList::iterator>(&hitListFwd[sortStart]), 00260 hitListFwd.end(), 00261 LessThanDiff() ); 00262 // sort(hitListFwd.begin()+sortStart,hitListFwd.end(),LessThanDiff() ); 00263 j=ub; 00264 } // ~while 00265 00266 } // ~HashTablePacked::convertHits
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::matchWordDeluxe | ( | Word | w, | |
| PackedHitStore & | hitList, | |||
| int | offset | |||
| ) | [inline] |
Definition at line 274 of file HashTablePacked.h.
References pMatchWord_.
Referenced by matchSequenceRepeated(), and matchSequenceStandard().
00275 { 00276 (this->*pMatchWord_)(w, hitList, offset); 00277 }
Here is the caller graph for this function:

| void HashTablePacked::loadHitList | ( | unsigned long | size | ) | [virtual] |
Reimplemented from HashTableView< PositionPacked, HashTablePacked >.
Definition at line 432 of file HashTablePacked.cpp.
References HashTableGeneric::getNumSequences(), loadFromFile(), HashTableView< T, SUBCLASS >::loadHitList(), HashTableGeneric::monitoringStream_, HashTableGeneric::name_, and seqStarts_.
00433 { 00434 00435 string startFileName(name_+(string)".start"); 00436 00437 ifstream startFile(startFileName.c_str()); 00438 if (startFile.fail()) 00439 { 00440 monitoringStream_ << "Could not open " << startFileName << endl; 00441 throw SSAHAException("Could not open .start file"); 00442 } // ~if 00443 00444 startFile.seekg(0,ios::end); 00445 long startFileSize = startFile.tellg(); 00446 00447 if ( getNumSequences() + 1 != startFileSize/sizeof(SeqStartPos)) 00448 { 00449 seqStarts_.resize(startFileSize/sizeof(SeqStartPos)); 00450 monitoringStream_ 00451 << "Info: expecting " << seqStarts_.size()-1 << " sequences in file" 00452 << endl; 00453 } // ~if 00454 else seqStarts_.resize(getNumSequences()+1); 00455 00456 loadFromFile( startFileName, 00457 (char*) &seqStarts_[0], 00458 startFileSize ); 00459 00460 HashTableView<PositionPacked,HashTablePacked>::loadHitList(size); 00461 00462 }
Here is the call graph for this function:

| void HashTablePacked::saveHitList | ( | void | ) | [virtual] |
Reimplemented from HashTableView< PositionPacked, HashTablePacked >.
Definition at line 466 of file HashTablePacked.cpp.
References HashTableGeneric::name_, HashTableView< T, SUBCLASS >::saveHitList(), saveToFile(), and seqStarts_.
00467 { 00468 00469 // assert(seqStarts_.size()==getNumSequences()+1); 00470 // assertion removed, not true when part of a HashTableTranslated 00471 00472 saveToFile( name_+(string)".start", 00473 (char*) &seqStarts_[0], 00474 (seqStarts_.size())*sizeof(SeqStartPos) ); 00475 00476 HashTableView<PositionPacked,HashTablePacked>::saveHitList(); 00477 } // ~HashTablePacked::saveHitList( void )
Here is the call graph for this function:

| void HashTablePacked::setNumRepeats | ( | int | numRepeats | ) | [virtual] |
Implements HashTableGeneric.
Definition at line 52 of file HashTablePacked.cpp.
References matchSequenceRepeated(), matchSequenceStandard(), numRepeats_, pMatchSequence_, and HashTableGeneric::stepLength_.
Referenced by main(), and HashTableTranslated::setNumRepeats().
00053 { 00054 if ( (numRepeats<0) || (numRepeats>stepLength_) ) 00055 throw SSAHAException("Invalid value for numRepeats!!"); 00056 numRepeats_=numRepeats; 00057 pMatchSequence_ = ( numRepeats==0 ) 00058 ? &HashTablePacked::matchSequenceStandard 00059 : &HashTablePacked::matchSequenceRepeated; 00060 } // ~HashTablePacked::setNumRepeats( int numRepeats )
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::setSubstituteThreshold | ( | int | ns | ) | [virtual] |
Reimplemented from HashTableGeneric.
Definition at line 62 of file HashTablePacked.cpp.
References matchWordStandard(), matchWordSubstitute(), HashTableGeneric::maxNumHits_, pMatchWord_, and substituteThreshold_.
Referenced by main(), and HashTableTranslated::setSubstituteThreshold().
00063 { 00064 if ( (numSubs<0) || (numSubs>maxNumHits_) ) 00065 throw SSAHAException("Invalid value for substituteThreshold!!"); 00066 substituteThreshold_=numSubs; 00067 pMatchWord_ = ( numSubs==0 ) 00068 ? &HashTablePacked::matchWordStandard 00069 : &HashTablePacked::matchWordSubstitute; 00070 } // ~HashTablePacked::setSubstituteThreshold( int numSubs )
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::matchSequenceStandard | ( | WordSequence & | seq, | |
| HitList & | hitListFwd | |||
| ) |
Definition at line 187 of file HashTablePacked.cpp.
References HashTableGeneric::bitsPerSymbol_, convertHits(), WordSequence::getNumBasesInLast(), HashTableView< PositionPacked, HashTablePacked >::last(), matchWordDeluxe(), shiftSequence(), and HashTableGeneric::wordLength_.
Referenced by setNumRepeats().
00188 { 00189 00190 PackedHitStore packedHits; 00191 int numBasesInLast( seq.getNumBasesInLast() ), baseOffset; 00192 00193 for ( int i(0) ; i < wordLength_ ; ++i ) 00194 { 00195 // matchWord( seq, packedHits, i ); 00196 if (seq.size()!=0) 00197 { 00198 baseOffset=i; 00199 WordSequence::const_iterator last(&seq.back()); 00200 for ( WordSequence::const_iterator thisWord(seq.begin()); 00201 thisWord != last ; ++thisWord ) 00202 { 00203 int oldSize(packedHits.size()); // %%%%%% 00204 matchWordDeluxe( *thisWord, packedHits, baseOffset ); 00205 // cout << printResidue(*thisWord, wordLength_) << " " 00206 // << packedHits.size()-oldSize; 00207 // for (int fk(oldSize);fk!=packedHits.size();fk++) cout << " " << packedHits[fk].first; 00208 // cout << endl; 00209 baseOffset += wordLength_; 00210 } // ~for 00211 } // ~if 00212 shiftSequence( seq, bitsPerSymbol_, wordLength_ ); 00213 if ( i == numBasesInLast ) 00214 { 00215 seq.pop_back(); 00216 } // ~if 00217 } // ~for 00218 00219 convertHits(packedHits,hitListFwd); 00220 00221 } // ~HashTablePacked::matchSequenceStandard
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::matchSequenceRepeated | ( | WordSequence & | seq, | |
| HitList & | hitListFwd | |||
| ) |
Definition at line 269 of file HashTablePacked.cpp.
References convertHits(), hits, HashTableView< PositionPacked, HashTablePacked >::matchWord(), matchWordDeluxe(), numRepeats_, seqStarts_, WordSequenceShifted::size(), HashTableGeneric::stepLength_, and HashTableGeneric::wordLength_.
Referenced by setNumRepeats().
00270 { 00271 WordSequenceShifted seqShifted(seq, *this); 00272 // screenRepeats( seqShifted, hitListFwd, numRepeats_ ); 00273 00274 PackedHitStore nonRepeatedHits; 00275 00276 Word thisWord; 00277 00278 int m; 00279 00280 // cout << "my size: " << size() << endl; 00281 00282 // i cycles through each full word in the query sequence 00283 for ( int i(0) ; i < seqShifted.size() ; ++i ) 00284 { 00285 00286 00287 // cout << "doing i:" << i << endl; 00288 thisWord = seqShifted[i]; 00289 m = 0; 00290 00291 // look through the next numRepeats_ words for duplicates 00292 for ( int j(i+1) ; 00293 ( ( j < seqShifted.size() ) && ( j <= i + numRepeats_ ) ); 00294 ++j ) 00295 { 00296 if ( thisWord == seqShifted[j] ) 00297 { 00298 m = j - i; 00299 // cout << "Tandem repeat: " << i << "-" << j << "\n"; // %%%% 00300 break; 00301 } // ~if 00302 } // ~for j 00303 if ( m == 0 ) 00304 { 00305 // cout << "doing bog standard matching for:" << i << endl; 00306 matchWordDeluxe( thisWord, nonRepeatedHits, i ); 00307 } // ~if 00308 else 00309 { 00310 // ... then we have found a tandem repeat of length m 00311 int r(1); 00312 00313 // scan forward until we reach either the end of the 00314 // repeated region or the end of the sequence 00315 while ( ( seqShifted[i+(r*m)]==thisWord ) 00316 && ( i+(r*m) < seqShifted.size() ) ) ++r; 00317 00318 // cout << "Num repeats: " << r << endl; 00319 00320 // any hits in a run of matching hits that exceed lastRun are 00321 // ignored, because in that case the size of the repeated 00322 // region in the subject sequence exceeds the size of the 00323 // repeated region in the query sequence 00324 int lastRun((r-1)*m); 00325 00326 // cout << "size of repeated run: " << lastRun << endl; 00327 00328 while ( seqShifted[i+lastRun] == seqShifted[i+lastRun-m] ) lastRun++; 00329 lastRun--; 00330 00331 // cout << "adjusted size of repeated run: " << lastRun << endl; 00332 00333 // HitListRepeated hits; 00334 PackedHitStore packedHits; 00335 HitListRepeated hits; 00336 00337 // as we proceed base by base along a region of tandem repeats 00338 // of motif length m, we encounter m distinct hash words, after 00339 // that, they repeat. Now get the hits for each of these words: 00340 // passing in j tags each hit with its position in the repeat cycle 00341 for ( int j(0) ; j < m ; ++j ) 00342 { 00343 matchWord( seqShifted[i+j], packedHits, j); 00344 } // ~for j 00345 00346 00347 sort(packedHits.begin(),packedHits.end()); 00348 00349 vector<HitPacked>::iterator pThisHit(packedHits.begin()); 00350 vector<SeqStartPos>::iterator ub, j(seqStarts_.begin()); 00351 // hitListFwd.reserve(hitListFwd.size()+packedHits.size()); 00352 00353 // NB seqStarts_ must not be empty else call to back() segfaults 00354 00355 // lastHit = previous hit in list, initialized to all zeroes 00356 HitPacked lastHit; 00357 // firstHit = first hit of a matching run, initialized to all zeroes 00358 HitPacked firstHit; 00359 00360 // thisRun = size of current run of matching hits 00361 int thisRun(0); 00362 00363 while 00364 ( ( j!=static_cast<vector<SeqStartPos>::iterator> 00365 ( &seqStarts_.back() ) ) 00366 && ( pThisHit!=packedHits.end() ) ) 00367 { 00368 ub = upper_bound(j,seqStarts_.end(),pThisHit->first); 00369 j=ub; j--; 00370 lastHit=*pThisHit; 00371 // ensures if condition always false first time through while 00372 while ((pThisHit!=packedHits.end())&&(pThisHit->first<*ub)) 00373 { 00374 if ( ( pThisHit->first 00375 == lastHit.first+1 ) 00376 && ( pThisHit->second 00377 == (( lastHit.second + stepLength_ ) % m) ) ) 00378 { 00379 if ( thisRun == 0 ) 00380 { 00381 // cout << " -s- "; 00382 // then a run of matching hits has started 00383 firstHit = lastHit; 00384 thisRun = wordLength_; 00385 } // ~if 00386 else 00387 { 00388 // cout << " -c- "; 00389 // we continue an existing run 00390 thisRun += stepLength_; 00391 } // ~else 00392 00393 00394 } // ~if 00395 else 00396 { 00397 thisRun=0; 00398 firstHit = *pThisHit; 00399 } 00400 // cout << " -n- "; 00401 if (thisRun <= lastRun ) 00402 { 00403 // only output hits if length of repeated region in subject 00404 // is less than or equal to that of query 00405 // cout << "added" << (*thisHit).subjectPos.offset 00406 // << "-" << i + firstHit.cyclePos + thisRun; 00407 // hitListFwd.addHit( thisHit->subjectPos, 00408 // i + firstHit.cyclePos + thisRun ); 00409 00410 nonRepeatedHits.push_back 00411 ( HitPacked( pThisHit->first, 00412 i + firstHit.second + thisRun ) ); 00413 } // ~if 00414 else 00415 { 00416 lastHit = *pThisHit; 00417 } // ~else 00418 pThisHit++; 00419 } // ~while 00420 j=ub; 00421 } // ~while 00422 i += lastRun; 00423 } // ~else 00424 } // ~for i 00425 convertHits( nonRepeatedHits, hitListFwd ); 00426 00427 } // ~HashTablePacked::matchSequenceRepeated
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::matchWordStandard | ( | Word | w, | |
| PackedHitStore & | hitList, | |||
| int | offset | |||
| ) | [inline] |
Definition at line 294 of file HashTablePacked.h.
References HashTableView< T, SUBCLASS >::matchWord().
Referenced by setSubstituteThreshold().
00295 { 00296 HashTableView<PositionPacked,HashTablePacked>::matchWord 00297 ( w, hitList, offset ); 00298 } // ~matchWordStandard( Word w, HitListVector hitList, int offset )
Here is the call graph for this function:

Here is the caller graph for this function:

| void HashTablePacked::matchWordSubstitute | ( | Word | w, | |
| PackedHitStore & | hitList, | |||
| int | offset | |||
| ) |
Definition at line 73 of file HashTablePacked.cpp.
References gCursedWord, HashTableView< T, SUBCLASS >::matchWord(), HashTableView< PositionPacked, HashTablePacked >::size(), substituteThreshold_, and HashTableGeneric::wordLength_.
Referenced by setSubstituteThreshold().
00074 { 00075 if ((w&gCursedWord)!=(Word)0) return; 00076 00077 HashTableView<PositionPacked,HashTablePacked>::matchWord 00078 ( w, hitList, offset ); 00079 assert((w&gCursedWord)==0); 00080 if (size(w)>substituteThreshold_) return; 00081 vector<Word> neighbours; 00082 (*pGenerateSubstitutes_)( w, neighbours, wordLength_); 00083 for( vector<Word>::iterator i(neighbours.begin()); 00084 i!=neighbours.end(); i++) 00085 { 00086 HashTableView<PositionPacked,HashTablePacked>::matchWord 00087 ( *i, hitList, offset ); 00088 } // ~for i 00089 } // void HashTablePacked::matchWordSubstitute
Here is the call graph for this function:

Here is the caller graph for this function:

friend class HashTableTranslated [friend] |
Definition at line 216 of file HashTablePacked.h.
Definition at line 218 of file HashTablePacked.h.
Definition at line 219 of file HashTablePacked.h.
vector<SeqStartPos> HashTablePacked::seqStarts_ [protected] |
Definition at line 303 of file HashTablePacked.h.
Referenced by HashTablePackedProtein::convertHits(), HashTableComponent::convertHits(), convertHits(), HashTablePackedCustom::convertHits(), HashTablePacked(), hashWords(), loadHitList(), matchSequenceRepeated(), and saveHitList().
unsigned int HashTablePacked::wordNum_ [protected] |
int HashTablePacked::numRepeats_ [protected] |
Definition at line 305 of file HashTablePacked.h.
Referenced by matchSequenceRepeated(), and setNumRepeats().
int HashTablePacked::substituteThreshold_ [protected] |
Definition at line 306 of file HashTablePacked.h.
Referenced by matchWordSubstitute(), and setSubstituteThreshold().
MatchSequencePointer HashTablePacked::pMatchSequence_ [protected] |
Reimplemented in HashTablePackedProtein.
Definition at line 307 of file HashTablePacked.h.
Referenced by matchSequence(), and setNumRepeats().
MatchWordPointer HashTablePacked::pMatchWord_ [protected] |
Definition at line 308 of file HashTablePacked.h.
Referenced by matchWordDeluxe(), and setSubstituteThreshold().
Definition at line 309 of file HashTablePacked.h.
Referenced by HashTableComponent::HashTableComponent(), and HashTablePackedProtein::HashTablePackedProtein().
RadixSorter HashTablePacked::sorter_ [protected] |
Definition at line 311 of file HashTablePacked.h.
Referenced by HashTablePackedProtein::convertHits(), HashTableComponent::convertHits(), and convertHits().
1.5.2