00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include "SSAHAWrapper.h"
00035 #include "SequenceReaderFasta.h"
00036 #include "HashTableTranslated.h"
00037 #include "SequenceEncoder.h"
00038 #include <vector>
00039 #include <string>
00040
00041
00042
00043
00044
00045
00046
00047
00048 struct CustomHitStore : public vector<SSAHAHit>, public HitList
00049 {
00050 virtual void addHit
00051 ( const PositionInDatabase& hitPos,
00052 const SequenceOffset& queryPos )
00053 {
00054 push_back();
00055 back().seqNum = hitPos.sequence;
00056 back().seqPos = hitPos.offset;
00057 }
00058
00059 };
00060
00061
00062 class HashTablePackedCustom;
00063 static HashTableGeneric* pHashTable(NULL);
00064 static const int wordLength=5;
00065 static SequenceEncoderProtein encoder(wordLength, cerr);
00066 static CustomHitStore hits;
00067
00068 static PackedHitStore hitBuffer;
00069 static vector<int> hitStarts;
00070 static bool isPacked=false;
00071
00072 class HashTablePackedCustom : public HashTablePackedProtein
00073 {
00074
00075 public:
00076 HashTablePackedCustom( ostream& monitoringStream=cerr, string name = ""):
00077 HashTablePackedProtein( monitoringStream, name )
00078 {}
00079
00080 void convertHits( void );
00081
00082 };
00083
00084
00085
00086
00087
00088 void HashTablePackedCustom::convertHits( void )
00089
00090 {
00091
00092
00093
00094 vector<HitPacked>::iterator i(hitBuffer.begin());
00095 vector<SeqStartPos>::iterator j(seqStarts_.begin());
00096
00097 hits.reserve( hits.size() + hitBuffer.size());
00098
00099
00100
00101 while ( (j!=&seqStarts_.back()) && (i!=hitBuffer.end()) )
00102 {
00103 vector<SeqStartPos>::iterator ub(upper_bound(j,seqStarts_.end(),i->first));
00104 j=ub; j--;
00105
00106 while ((i!=hitBuffer.end())&&(i->first<*ub))
00107 {
00108
00109
00110
00111
00112
00113
00114
00115 hits.push_back();
00116 hits.back().seqNum= ub-seqStarts_.begin();
00117 hits.back().seqPos= stepLength_*(i->first - *j);
00118
00119
00120
00121
00122 i++;
00123
00124 }
00125 j=ub;
00126 }
00127
00128 }
00129
00130
00131
00132
00133
00134 const char* getSubjectName( unsigned int seqNum )
00135 {
00136 return pHashTable->getSequenceName( seqNum );
00137 }
00138
00139
00140
00141 int loadTable( const char* const tableName )
00142 {
00143 clearSearch();
00144 clearTable();
00145
00146 if (isPacked)
00147 {
00148 pHashTable = new HashTablePackedCustom( cerr, (string)tableName);
00149 }
00150 else
00151 {
00152 pHashTable = new HashTableFred( cerr, (string)tableName);
00153 }
00154
00155 pHashTable->loadHashTable();
00156
00157 assert(pHashTable->getWordLength()==wordLength);
00158 pHashTable->setMaxNumHits( 100000000 );
00159 }
00160
00161
00162 int doSearch
00163 (
00164 const char* const pWords,
00165 const int numWords,
00166 SSAHAHit** const pHits,
00167 int ** const pStarts
00168 )
00169 {
00170 clearSearch();
00171
00172 WordSequence w;
00173
00174 encoder.linkSeq(w);
00175 encoder.encode(pWords, numWords*wordLength);
00176 encoder.unlinkSeq();
00177
00178 if(w.empty())
00179 {
00180 hitStarts.push_back(-1);
00181 *pHits=NULL;
00182 *pStarts=&hitStarts[0];
00183 return -1;
00184 }
00185
00186
00187 if (isPacked)
00188 {
00189 HashTablePackedCustom* pTable
00190 = dynamic_cast<HashTablePackedCustom*>(pHashTable);
00191 assert(pTable!=NULL);
00192 pTable->setQueryProtein();
00193 for (WordSequence::iterator i(w.begin());i!=&w.back();i++)
00194 {
00195
00196 hitBuffer.clear();
00197 pTable->matchWord( *i, hitBuffer );
00198
00199 pTable->convertHits();
00200 hitStarts.push_back(hits.size());
00201 }
00202 }
00203 else
00204 {
00205 HashTableFred* pTable
00206 = dynamic_cast<HashTableFred*>(pHashTable);
00207 assert(pTable!=NULL);
00208
00209 for (WordSequence::iterator i(w.begin());i!=&w.back();i++)
00210 {
00211 pTable->matchWord( *i, hits );
00212 }
00213 hitStarts.push_back(hits.size());
00214 }
00215
00216
00217
00218
00219
00220 hitStarts.push_back(-1);
00221 *pHits=&hits[0];
00222 *pStarts=&hitStarts[0];
00223 return 0;
00224 }
00225
00226
00227 int clearSearch( void )
00228 {
00229 hits.clear();
00230 hitStarts.clear();
00231 }
00232
00233
00234 int clearTable( void )
00235 {
00236 delete pHashTable;
00237 pHashTable = NULL;
00238 }
00239
00240
00241
00242
00243