00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "MatchStoreGapped.h"
00034 #include "SequenceReader.h"
00035 #include "HashTableGeneric.h"
00036 #include "HashTablePacked.h"
00037 #include "GlobalDefinitions.h"
00038 #include <algorithm>
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049 void MatchAlgorithm::operator()
00050 ( WordSequence& querySeq,
00051 MatchAdder& addMatch,
00052 HashTableGeneric& subjectTable,
00053 int wordLength,
00054 int stepLength )
00055 {
00056
00057 HitListVector hitList;
00058 wordLength_ = ((wordLength==-1)?subjectTable.getWordLength():wordLength);
00059 stepLength_ = ((stepLength==-1)?wordLength_:stepLength);
00060 assert(hitList.size()==0);
00061
00062 if (dynamic_cast<HashTablePacked*>(&subjectTable)!=NULL)
00063 {
00064 sortNeeded_ = false;
00065 }
00066
00067 generateHits( querySeq, hitList, subjectTable );
00068
00069 generateMatches( hitList, addMatch );
00070 }
00071
00072 void MatchAlgorithm::generateHits
00073 ( WordSequence& querySeq, HitListVector& hitList, HashTableGeneric& subjectTable )
00074 {
00075 subjectTable.setNumRepeats( numRepeats_ );
00076
00077
00078 subjectTable.matchSequence( querySeq, hitList );
00079 }
00080
00081
00082
00083 void MatchAlgorithmGapped::generateMatches
00084 ( HitListVector& hitList, MatchAdder& addMatch )
00085 {
00086
00087 if (hitList.size()==0) return;
00088
00089 if (sortNeeded_)
00090 {
00091 sort( hitList.begin(), hitList.end(), LessThanSubject() );
00092 }
00093
00094 minHitsForMatch_ = max(2,minToProcess_ / stepLength_);
00095 maxQueryDiff_ = maxGap_ + stepLength_;
00096
00097 bool inRun(false);
00098
00099 HitListVector::iterator pFirstMatch( hitList.begin() );
00100
00101 for ( int i(0) ; i < ((int)hitList.size()) - 1 ; ++i )
00102 {
00103 if ( ( hitList[i+1].subjectNum == hitList[i].subjectNum )
00104 && ( hitList[i+1].diff - hitList[i].diff <=maxInsert_ ))
00105 {
00106 if ( inRun==false )
00107 {
00108 pFirstMatch = static_cast<HitList::iterator>(&hitList[i]);
00109 inRun=true;
00110 }
00111 }
00112 else
00113 {
00114 if (inRun==true)
00115 findMatchesInRange
00116 ( pFirstMatch,
00117 static_cast<HitList::iterator>(&hitList[i+1]),
00118 addMatch );
00119 inRun=false;
00120 }
00121
00122 }
00123
00124 if (inRun==true)
00125 findMatchesInRange( pFirstMatch, hitList.end(), addMatch );
00126
00127 return;
00128
00129 }
00130
00131
00132 void MatchAlgorithmGapped::findMatchesInRange
00133 ( HitListVector::iterator first, HitListVector::iterator last,
00134 MatchAdder& addMatch )
00135 {
00136
00137 if (first == last) return;
00138
00139
00140
00141 sort( first, last, LessThanQuery() );
00142
00143
00144
00145
00146
00147
00148
00149 int lastQueryPos, numBases, gapSize;
00150 SequenceOffset subjectStart, subjectEnd;
00151
00152 while ( last-first >= minHitsForMatch_ )
00153 {
00154
00155 lastQueryPos = first->queryPos;
00156 numBases = wordLength_;
00157 HitListVector::iterator i(first);
00158
00159 while(++i!=last)
00160 {
00161
00162 gapSize = i->queryPos - lastQueryPos;
00163
00164 if ( gapSize > maxQueryDiff_ ) break;
00165 numBases += min (wordLength_, gapSize );
00166 lastQueryPos = i->queryPos;
00167 }
00168
00169 if (numBases>=minToProcess_)
00170 {
00171 i--;
00172
00173
00174
00175
00176
00177 subjectStart = first->diff + first->queryPos;
00178 subjectEnd= i->diff + i->queryPos + wordLength_ - 1;
00179
00180
00181
00182
00183 if (subjectEnd>subjectStart)
00184 {
00185 addMatch
00186 (
00187 first->subjectNum,
00188 numBases,
00189 first->queryPos,
00190 i->queryPos + wordLength_ - 1,
00191 subjectStart,
00192 subjectEnd
00193 );
00194 }
00195 i++;
00196 }
00197
00198 first = i;
00199
00200 }
00201
00202 }
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224 #ifdef OLD_GAP
00225 void MatchAlgorithmGapped::generateMatches
00226 ( HitListVector& hitList, MatchAdder& addMatch )
00227 {
00228
00229
00230
00231 sort( hitList.begin(), hitList.end(), LessThanSubject() );
00232
00233
00234
00235 if (hitList.size()==0) return;
00236
00237
00238 int numMatches( wordLength_ );
00239
00240 HitListVector::iterator pFirstMatch( hitList.begin() );
00241
00242 for ( int i(0) ; i < ((int)hitList.size()) - 1 ; ++i )
00243 {
00244
00245
00246
00247
00248
00249
00250
00251
00252 if ( ( hitList[i+1].subjectNum == hitList[i].subjectNum )
00253 && ( hitList[i+1].diff - hitList[i].diff <=maxGap_ ))
00254 {
00255
00256 if ( numMatches == 0 )
00257 {
00258 pFirstMatch = &hitList[i];
00259
00260 numMatches = wordLength_;
00261 }
00262 numMatches += wordLength_;
00263 }
00264 else if ( numMatches != 0 )
00265 {
00266
00267 if ( numMatches >= minToProcess_ )
00268 {
00269
00270
00271
00272 sort( pFirstMatch, &hitList[i+1], LessThanQuery() );
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283 addMatch ( pFirstMatch->subjectNum,
00284 numMatches,
00285 pFirstMatch->queryPos,
00286 hitList[i].queryPos
00287 + wordLength_ - 1,
00288 pFirstMatch->diff + pFirstMatch->queryPos,
00289 hitList[i].diff + hitList[i].queryPos
00290 + wordLength_ - 1 );
00291 }
00292 numMatches = 0;
00293 }
00294
00295 }
00296
00297 if ( ( numMatches >= minToProcess_ ) && (!hitList.empty()) )
00298 {
00299 sort( pFirstMatch, hitList.end(), LessThanQuery() );
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309 addMatch ( pFirstMatch->subjectNum,
00310 numMatches,
00311 pFirstMatch->queryPos,
00312 hitList.back().queryPos
00313 + wordLength_ - 1,
00314 pFirstMatch->diff + pFirstMatch->queryPos,
00315 hitList.back().diff + hitList.back().queryPos
00316 + wordLength_ - 1
00317 );
00318 }
00319
00320 return;
00321
00322 }
00323 #endif
00324
00325
00326
00327
00328
00329
00330