HashTable Class Reference

#include <HashTable.h>

Inheritance diagram for HashTable:

Inheritance graph
[legend]
Collaboration diagram for HashTable:

Collaboration graph
[legend]
List of all members.

Public Types

typedef void(HashTable::*) MatchSequencePointer (WordSequence &, HitList &)

Public Member Functions

 HashTable (ostream &monitoringStream=cerr, string name="", Allocator< PositionInDatabase > &hitListAllocator=defaultHitListAllocator, Allocator< PositionInHitList > &arrayAllocator=defaultArrayAllocator)
virtual void setNumRepeats (int numRepeats)
virtual void matchSequence (WordSequence &seq, HitList &hitListFwd)
virtual void hashWords (SequenceAdapter &thisSeq, SequenceNumber seqNum)
virtual void countWords (SequenceAdapter &thisSeq)
void matchSequenceStandard (WordSequence &seq, HitList &hitListFwd)
void matchSequenceRepeated (WordSequence &seq, HitList &hitListFwd)

Static Public Member Functions

static SequenceNumber getSequence (const_iterator i)
static SequenceOffset getOffset (const_iterator i)

Public Attributes

MatchSequencePointer pMatchSequence_
int numRepeats_

Static Private Attributes

static AllocatorLocal< PositionInHitListdefaultArrayAllocator
static AllocatorLocal< PositionInDatabasedefaultHitListAllocator

Detailed Description

Definition at line 41 of file HashTable.h.


Member Typedef Documentation

typedef void(HashTable::* ) HashTable::MatchSequencePointer(WordSequence &, HitList &)

Definition at line 46 of file HashTable.h.


Constructor & Destructor Documentation

HashTable::HashTable ( ostream &  monitoringStream = cerr,
string  name = "",
Allocator< PositionInDatabase > &  hitListAllocator = defaultHitListAllocator,
Allocator< PositionInHitList > &  arrayAllocator = defaultArrayAllocator 
) [inline]

Definition at line 52 of file HashTable.h.

References gStandard, HashTableGeneric::hitListFormat_, and HashTableGeneric::monitoringStream_.

00057                                       :
00058   HashTableView<PositionInDatabase,HashTable>
00059     (monitoringStream, name, hitListAllocator, arrayAllocator), 
00060   pMatchSequence_(&HashTable::matchSequenceStandard),
00061   numRepeats_(0)
00062   {
00063     hitListFormat_ = gStandard;
00064     monitoringStream_ << "constructing HashTable\n";
00065   }


Member Function Documentation

SequenceNumber HashTable::getSequence ( const_iterator  i  )  [inline, static]

Definition at line 137 of file HashTable.h.

References PositionInDatabase::sequence.

00138 {
00139   return i->sequence;
00140 } // ~SequenceNumber HashTable

SequenceOffset HashTable::getOffset ( const_iterator  i  )  [inline, static]

Definition at line 144 of file HashTable.h.

References PositionInDatabase::offset.

00145 {
00146   return i->offset;
00147 } // ~SequenceOffset HashTableView<PositionInDatabase>::getOffset

void HashTable::setNumRepeats ( int  numRepeats  )  [virtual]

Implements HashTableGeneric.

Definition at line 58 of file HashTable.cpp.

References matchSequenceRepeated(), matchSequenceStandard(), numRepeats_, pMatchSequence_, and HashTableGeneric::stepLength_.

Referenced by main().

00059 {
00060   if ( (numRepeats<0) || (numRepeats>stepLength_) )
00061     throw SSAHAException("Invalid value for numRepeats!!");
00062   numRepeats_=numRepeats;
00063   pMatchSequence_ = ( numRepeats==0 )
00064     ? &HashTable::matchSequenceStandard
00065     : &HashTable::matchSequenceRepeated;
00066 }

Here is the call graph for this function:

Here is the caller graph for this function:

virtual void HashTable::matchSequence ( WordSequence seq,
HitList hitListFwd 
) [inline, virtual]

Implements HashTableGeneric.

Definition at line 93 of file HashTable.h.

References pMatchSequence_.

Referenced by main().

00094   { (this->*pMatchSequence_)(seq, hitListFwd); }

Here is the caller graph for this function:

void HashTable::hashWords ( SequenceAdapter thisSeq,
SequenceNumber  seqNum 
) [virtual]

Implements HashTableView< PositionInDatabase, HashTable >.

Definition at line 279 of file HashTable.cpp.

References gCursedWord, PositionInDatabase::offset, HashTableView< PositionInDatabase, HashTable >::pHitListForAllWords_, HashTableGeneric::pHitsFoundSoFar_, HashTableGeneric::pWordPositionInHitList_, PositionInDatabase::sequence, SequenceAdapter::size(), and HashTableGeneric::stepLength_.

00280 {
00281 
00282         register Word              thisWord;
00283         register PositionInHitList currentPos;
00284       // NB We stop at the last but one element of the 
00285       // sequence (as the last isn't a full word)
00286 
00287    for ( int j(0) ; j < thisSeq.size() ; ++ j )
00288       {
00289         thisWord = thisSeq[j];
00290         // only hash words that have not been flagged
00291         if ((thisWord&gCursedWord)!=(Word)0) continue;
00292         currentPos 
00293         = pHitsFoundSoFar_[thisWord]
00294           +( ( thisWord == 0 ) 
00295              ? 0 : pWordPositionInHitList_[thisWord - 1]) ;
00296                            
00297         if ( currentPos != pWordPositionInHitList_[thisWord] )
00298         { // then place position in the hit list
00299           pHitListForAllWords_[currentPos].sequence 
00300           = seqNum; 
00301           pHitListForAllWords_[currentPos].offset   
00302           = j * stepLength_; 
00303           //      DEBUG_L2("list "<< printWord(thisWord,wordLength_) 
00304           //  << " "<< seqNum << " " << j*stepLength_ );
00305           pHitsFoundSoFar_[thisWord]++;
00306         } // ~if
00307 
00308       } // ~ for thisWord
00309 
00310 
00311 } // ~HashTable::hashWords

Here is the call graph for this function:

void HashTable::countWords ( SequenceAdapter thisSeq  )  [virtual]

Implements HashTableView< PositionInDatabase, HashTable >.

Reimplemented in HashTableTest.

Definition at line 265 of file HashTable.cpp.

References gCursedWord, HashTableGeneric::pWordPositionInHitList_, and SequenceAdapter::size().

00266 {
00267 
00268    for ( int j(0) ; j < thisSeq.size() ; ++ j )
00269    {
00270      // only count words that have not been flagged
00271      pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))]
00272        += ((thisSeq[j]&gCursedWord)==(Word)0);
00273      //   pWordPositionInHitList_[thisSeq[j]]++;
00274    }
00275    
00276 } // ~HashTable::countWords

Here is the call graph for this function:

void HashTable::matchSequenceStandard ( WordSequence seq,
HitList hitListFwd 
)

Definition at line 81 of file HashTable.cpp.

References HashTableGeneric::bitsPerSymbol_, WordSequence::getNumBasesInLast(), HashTableView< PositionInDatabase, HashTable >::matchWord(), shiftSequence(), and HashTableGeneric::wordLength_.

Referenced by setNumRepeats().

00082   {
00083     
00084     int numBasesInLast( seq.getNumBasesInLast() );
00085 
00086     for ( int i(0) ; i < wordLength_ ; ++i )
00087     {
00088       matchWord( seq,    hitListFwd, i );
00089       shiftSequence( seq, bitsPerSymbol_, wordLength_ );
00090       if ( i == numBasesInLast )
00091       {
00092         seq.pop_back();
00093       } // ~if
00094     } // ~for
00095 
00096   } // ~HashTable::matchSequence

Here is the call graph for this function:

Here is the caller graph for this function:

void HashTable::matchSequenceRepeated ( WordSequence seq,
HitList hitListFwd 
)

Definition at line 104 of file HashTable.cpp.

References HitListVector::addHit(), RepeatedHit::cyclePos, hits, HashTableView< PositionInDatabase, HashTable >::matchWord(), numRepeats_, WordSequenceShifted::size(), HashTableGeneric::stepLength_, and HashTableGeneric::wordLength_.

Referenced by setNumRepeats().

00106   {
00107     WordSequenceShifted seqShifted(seq, *this);
00108     //    screenRepeats( seqShifted, hitListFwd, numRepeats_ );
00109 
00110     Word                thisWord;
00111 
00112     int m;
00113 
00114     //  cout << "my size: " << size() << endl;
00115 
00116     // i cycles through each full word in the query sequence
00117     for ( int i(0) ; i < seqShifted.size() ; ++i )
00118     {
00119 
00120 
00121       //      cout << "doing  i:" << i << endl;
00122       thisWord = seqShifted[i];
00123       m = 0;
00124 
00125       // look through the next numRepeats_ words for duplicates
00126       for ( int j(i+1) ; 
00127             ( ( j < seqShifted.size() ) && ( j <= i + numRepeats_ ) ); 
00128             ++j )
00129       {
00130         if ( thisWord == seqShifted[j] )
00131         {
00132           m = j - i;
00133           //      cout << "Tandem repeat: " << i << "-" << j << "\n"; // %%%%
00134           break;
00135         } // ~if
00136       } // ~for j
00137       if ( m == 0 )
00138       {
00139         //      cout << "doing bog standard matching for:" << i << endl;
00140         matchWord( thisWord, hitListFwd, i );
00141       } // ~if
00142       else
00143       {
00144           // ... then we have found a tandem repeat of length m
00145           int r(1);
00146 
00147           // scan forward until we reach either the end of the
00148           // repeated region or the end of the sequence
00149           while (    ( seqShifted[i+(r*m)]==thisWord )
00150                   && ( i+(r*m) < seqShifted.size() ) ) ++r;
00151 
00152           //          cout << "Num repeats: " << r << endl;
00153 
00154           // any hits in a run of matching hits that exceed lastRun are
00155           // ignored, because in that case the size of the repeated
00156           // region in the subject sequence exceeds the size of the
00157           // repeated region in the query sequence
00158           int lastRun((r-1)*m);
00159 
00160           //      cout << "size of repeated run: " << lastRun << endl;
00161 
00162           while ( seqShifted[i+lastRun] == seqShifted[i+lastRun-m] ) lastRun++;
00163           lastRun--;
00164 
00165           //      cout << "adjusted size of repeated run: " << lastRun << endl;
00166 
00167           HitListRepeated hits;
00168 
00169           // as we proceed base by base along a region of tandem repeats
00170           // of motif length m, we encounter m distinct hash words, after
00171           // that, they repeat. Now get the hits for each of these words: 
00172           // passing in j tags each hit with its position in the repeat cycle
00173           for ( int j(0) ; j < m ; ++j ) 
00174           {
00175             matchWord( seqShifted[i+j], hits, j);
00176           } // ~for j
00177 
00178           // sort hits in order of RepeatedHit::subjectPos
00179           sort( hits.begin(), hits.end() );
00180 
00181           // lastHit =  previous hit in list, initialized to all zeroes
00182           RepeatedHit lastHit; 
00183           // firstHit = first hit of a matching run, initialized to all zeroes
00184           RepeatedHit firstHit;
00185 
00186           // thisRun = size of current run of matching hits
00187           int thisRun(0);
00188 
00189         
00190           for ( HitListRepeated::iterator thisHit( hits.begin() ); 
00191                 thisHit != hits.end() ; ++thisHit )
00192           {
00193             //      cout << ": " << (*thisHit).subjectPos.sequence << " " 
00194             //      << (*thisHit).subjectPos.offset << " " 
00195             //      << (*thisHit).cyclePos ; 
00196             if (    (    (*thisHit).subjectPos.sequence 
00197                       == lastHit.subjectPos.sequence )
00198                  && (    (*thisHit).subjectPos.offset
00199                       == lastHit.subjectPos.offset + stepLength_ )
00200                  && (    (*thisHit).cyclePos
00201                       == (( lastHit.cyclePos + stepLength_ ) % m)   ) )
00202             {
00203               if ( thisRun == 0 ) 
00204               { 
00205                 //      cout << " -s- ";
00206                 // then a run if matching hits has started
00207                 firstHit = lastHit; 
00208                 thisRun  = wordLength_;
00209               } // ~if
00210               else 
00211               {
00212                 //              cout << " -c- ";
00213                 // we continue an existing run             
00214                 thisRun += stepLength_;
00215               }
00216  
00217 
00218             } // ~if
00219             else 
00220             { 
00221               thisRun=0; 
00222               firstHit = *thisHit;
00223             }
00224             //      cout << " -n- ";
00225             if (thisRun <= lastRun ) 
00226             { 
00227               // only output hits if length of repeated region in subject 
00228               // is less than or equal to that of query
00229               //    cout << "added" << (*thisHit).subjectPos.offset 
00230               //   << "-" << i + firstHit.cyclePos + thisRun;
00231               hitListFwd.addHit( thisHit->subjectPos, 
00232                                    i + firstHit.cyclePos + thisRun ); 
00233             } else // cout << "ignored" << (*thisHit).subjectPos.offset 
00234                    //     << "-" << i + firstHit.cyclePos + thisRun;
00235 
00236        
00237             lastHit = *thisHit;
00238             //      cout << endl;
00239           } // ~for
00240 
00241           // carry on at the end of the repeated region in the query seq
00242           // i += (r-1)*m+1;
00243           // Actually want 1 less cos i is incremented anyway at the
00244           // end of the for loop - TC 5.7.1
00245           //          i += (r-1)*m;
00246           i += lastRun;
00247           // cout << "Carrying on at pos " << i+1 << "\n";
00248           // break out of the `for m' loop
00249           //     break;
00250        
00251 
00252 
00253       } // ~else
00254 
00255     } // ~for i
00256 
00257 
00258 
00259   } // ~HashTable::matchSequenceRepeated

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

AllocatorLocal< PositionInHitList > HashTable::defaultArrayAllocator [static, private]

Definition at line 43 of file HashTable.h.

AllocatorLocal< PositionInDatabase > HashTable::defaultHitListAllocator [static, private]

Definition at line 44 of file HashTable.h.

MatchSequencePointer HashTable::pMatchSequence_

Definition at line 104 of file HashTable.h.

Referenced by matchSequence(), and setNumRepeats().

int HashTable::numRepeats_

Definition at line 105 of file HashTable.h.

Referenced by matchSequenceRepeated(), and setNumRepeats().


The documentation for this class was generated from the following files:
Generated on Fri Dec 21 13:16:01 2007 for ssaha by  doxygen 1.5.2