00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #ifndef INCLUDED_HashTablePacked
00032 #define INCLUDED_HashTablePacked
00033
00034
00035
00036
00037 #include "HashTableGeneric.h"
00038
00039
00040
00041
00042
00043
00044 typedef unsigned int PositionPacked;
00045 typedef unsigned int SeqStartPos;
00046
00047 typedef pair<PositionPacked, int> HitPacked;
00048 struct PackedHitStore : public vector<HitPacked>
00049 {
00050 void addHit( PositionPacked pos, int baseOffset )
00051 {
00052 push_back(HitPacked(pos,baseOffset));
00053 }
00054
00055 };
00056 static const HitPacked zeroHit = HitPacked(0,0);
00057
00058
00059
00060
00061
00062
00063 class RadixSorter
00064 {
00065
00066
00067 typedef unsigned int CountInt;
00068
00069 public:
00070 RadixSorter( const unsigned int digits, const unsigned int bits );
00071 void operator()( vector<HitPacked>& v );
00072 void CountDigits( const vector<HitPacked>& v );
00073 void SortByDigit( PositionPacked digit );
00074
00075 protected:
00076 const unsigned int digits_;
00077 const unsigned int bits_;
00078 const unsigned long base_;
00079 const PositionPacked mask_;
00080 vector<HitPacked> v1_;
00081 vector< vector<CountInt> > counts_;
00082 vector<HitPacked>* source_;
00083 vector<HitPacked>* target_;
00084 vector< vector<HitPacked>::iterator > places_;
00085
00086 };
00087
00088 class LessThanDiff
00089 {
00090 public:
00091 bool operator()( const HitInfo& lhs, const HitInfo& rhs ) const
00092 {
00093 return ( lhs.diff < rhs.diff );
00094 }
00095 };
00096
00097
00098 void generateSubstitutesDNA
00099 (Word w, vector<Word>& subs, int wordLength);
00100
00101 void generateSubstitutesProtein
00102 (Word w, vector<Word>& subs, int wordLength);
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127 const Word subVals[] =
00128 {
00129
00130 16,
00131
00132
00133 4, 12,
00134 3, 9, 14,
00135 19, 21,
00136
00137 12, 21,
00138 10, 11, 18,
00139 4, 14, 15,
00140 8, 11, 18,
00141 8, 10, 18,
00142 3, 7, 16,
00143
00144 4, 9, 15,
00145 9, 14,
00146 1, 12, 17,
00147 16,
00148 8, 10, 11,
00149 5, 21,
00150 5, 7, 19,
00151
00152 99999
00153 };
00154
00155 const int subStarts[] =
00156 {
00157 0,
00158 0,
00159 1,
00160 1,
00161 3,
00162 6,
00163 8,
00164 8,
00165 10,
00166 13,
00167 16,
00168 19,
00169 22,
00170 25,
00171 25,
00172 28,
00173 30,
00174 33,
00175 34,
00176 37,
00177 39,
00178 39,
00179 42
00180 };
00181
00182
00183 #ifdef POSITIVE_SCORING_BLOSUM_SUBS
00184 Original table: generated from BLOSUM62 table in MatchStore.h
00185 AS: 1
00186 BD: 4 BE: 1 BN: 3 BZ: 1
00187 DB: 4 DE: 2 DN: 1 DZ: 1
00188 EB: 1 ED: 2 EK: 1 EQ: 2 EZ: 4
00189 FW: 1 FY: 3
00190 HN: 1 HY: 2
00191 IL: 2 IM: 1 IV: 3
00192 KE: 1 KQ: 1 KR: 2 KZ: 1
00193 LI: 2 LM: 2 LV: 1
00194 MI: 1 ML: 2 MV: 1
00195 NB: 3 ND: 1 NH: 1 NS: 1
00196 QE: 2 QK: 1 QR: 1 QZ: 3
00197 RK: 2 RQ: 1
00198 SA: 1 SN: 1 ST: 1
00199 TS: 1
00200 VI: 3 VL: 1 VM: 1
00201 WF: 1 WY: 2
00202 YF: 3 YH: 2 YW: 2
00203 ZB: 1 ZD: 1 ZE: 4 ZK: 1 ZQ: 3
00204 #endif
00205
00206
00207
00208
00209
00210
00211
00212
00213 class HashTablePacked :
00214 public HashTableView<PositionPacked,HashTablePacked>
00215 {
00216 friend class HashTableTranslated;
00217 public:
00218 static AllocatorLocal<PositionInHitList> defaultArrayAllocator;
00219 static AllocatorLocal<PositionPacked> defaultHitListAllocator;
00220
00221 typedef void (HashTablePacked::* MatchSequencePointer)
00222 (WordSequence&, HitList&);
00223
00224 typedef void (HashTablePacked::* MatchWordPointer)
00225 (Word, PackedHitStore&, int);
00226
00227 typedef void (* GenerateSubstitutesPointer)
00228 (Word, vector<Word>&, int);
00229
00230 HashTablePacked( ostream& monitoringStream=cerr,
00231 string name="",
00232 Allocator<PositionPacked>& hitListAllocator
00233 = defaultHitListAllocator,
00234 Allocator<PositionInHitList>& arrayAllocator
00235 = defaultArrayAllocator ):
00236 HashTableView<PositionPacked,HashTablePacked>
00237 (monitoringStream, name, hitListAllocator, arrayAllocator),
00238 wordNum_(0),
00239 pMatchSequence_(&HashTablePacked::matchSequenceStandard),
00240 pMatchWord_(&HashTablePacked::matchWordStandard),
00241 pGenerateSubstitutes_(&generateSubstitutesDNA),
00242 numRepeats_(0),
00243 substituteThreshold_(0),
00244 sorter_(4,(sizeof(PositionPacked)*8)/4)
00245 {
00246 hitListFormat_ = g32BitPacked;
00247 seqStarts_.push_back(0);
00248 monitoringStream_ << "constructing HashTablePacked\n";
00249
00250
00251
00252 }
00253
00254
00255
00256
00257
00258 inline static SequenceNumber getSequence( const_iterator i );
00259 inline static SequenceOffset getOffset( const_iterator i );
00260
00261
00262 virtual void hashWords
00263 ( SequenceAdapter& thisSeq, SequenceNumber seqNum );
00264 virtual void countWords( SequenceAdapter& thisSeq );
00265
00266 virtual void matchSequence
00267 ( WordSequence& seq, HitList& hitListFwd )
00268 { (this->*pMatchSequence_)(seq,hitListFwd); }
00269
00270
00271 virtual void convertHits
00272 ( PackedHitStore& packedHits, HitList& hitListFwd );
00273
00274 void matchWordDeluxe( Word w, PackedHitStore& hitList, int offset )
00275 {
00276 (this->*pMatchWord_)(w, hitList, offset);
00277 }
00278
00279
00280
00281 virtual void loadHitList( unsigned long size );
00282
00283 virtual void saveHitList( void );
00284
00285 virtual void setNumRepeats(int numRepeats);
00286 virtual void setSubstituteThreshold( int ns );
00287
00288 void matchSequenceStandard
00289 ( WordSequence& seq, HitList& hitListFwd );
00290
00291 void matchSequenceRepeated
00292 ( WordSequence& seq, HitList& hitListFwd );
00293
00294 void matchWordStandard( Word w, PackedHitStore& hitList, int offset )
00295 {
00296 HashTableView<PositionPacked,HashTablePacked>::matchWord
00297 ( w, hitList, offset );
00298 }
00299
00300 void matchWordSubstitute( Word w, PackedHitStore& hitList, int offset );
00301
00302 protected:
00303 vector<SeqStartPos> seqStarts_;
00304 unsigned int wordNum_;
00305 int numRepeats_;
00306 int substituteThreshold_;
00307 MatchSequencePointer pMatchSequence_;
00308 MatchWordPointer pMatchWord_;
00309 GenerateSubstitutesPointer pGenerateSubstitutes_;
00310
00311 RadixSorter sorter_;
00312
00313 };
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325 #endif
00326
00327