This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Data Structures | |
| struct | indexCoordinate |
| struct | wordList |
| struct | queryWord |
Functions | |
| void | index_initializeBuild (uint4 fromCodeword, uint4 toCodeword) |
| void | index_addSubject (unsigned char *subject, uint4 subjectLength, uint4 fromCodeword, uint4 toCodeword) |
| void | index_finishBuild (uint4 fromCodeword, uint4 toCodeword) |
| uint4 * | index_wordOffsetPositions () |
| uint4 | index_numWordOffsets (uint4 codeword) |
| unsigned char * | index_wordOffsets (uint4 codeword) |
| void | index_processQuery (unsigned char *startIndex, struct PSSMatrix PSSMatrix, uint4 collectionSize) |
| indexCoordinate * | index_getFirstCoordinate () |
| indexCoordinate * | index_getNextCoordinate () |
| uint4 | index_generateCodeword (unsigned char *word, uint4 wordSize) |
| void | index_print () |
Variables | |
| uint4 | index_wordSize |
| uint4 | index_intervalSize |
| indexCoordinate ** | index_sequenceCoordinates |
| indexCoordinate * | index_coordinates |
| uint4 | index_numCoordinates |
| uint4 * | index_sequencePositions |
| uint4 * | index_descriptionLocations |
| void index_addSubject | ( | unsigned char * | subject, | |
| uint4 | subjectLength, | |||
| uint4 | fromCodeword, | |||
| uint4 | toCodeword | |||
| ) |
Definition at line 77 of file index.c.
References index_addWord(), index_generateCodeword(), index_intervalSize, index_subjectNumber, index_wordSize, and uint4.
Referenced by main().
00078 { 00079 uint4 codeword, wordOffset; 00080 00081 // Add subjectNumber and offset to lists for each word 00082 wordOffset = 0; 00083 while (wordOffset + index_wordSize - 1 < subjectLength) 00084 { 00085 // For this word 00086 codeword = index_generateCodeword(subject + wordOffset, index_wordSize); 00087 00088 // If it is in the range of codewords we are considering 00089 if (codeword >= fromCodeword && codeword < toCodeword) 00090 { 00091 // Record subject number and offset of word in index 00092 index_addWord(codeword, index_subjectNumber, wordOffset / index_intervalSize);// + (index_wordSize - 4)); 00093 } 00094 00095 // Index every Nth word 00096 wordOffset += index_intervalSize; 00097 } 00098 00099 index_subjectNumber++; 00100 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void index_finishBuild | ( | uint4 | fromCodeword, | |
| uint4 | toCodeword | |||
| ) |
Definition at line 58 of file index.c.
References index_words, wordList::offsets, and uint4.
Referenced by main().
00059 { 00060 uint4 codeword; 00061 00062 // For each word 00063 codeword = fromCodeword; 00064 while (codeword < toCodeword) 00065 { 00066 // Free list 00067 free(index_words[codeword].offsets); 00068 codeword++; 00069 } 00070 00071 // Free the lists 00072 index_words += fromCodeword; 00073 free(index_words); 00074 }
Here is the caller graph for this function:

| uint4 index_generateCodeword | ( | unsigned char * | word, | |
| uint4 | wordSize | |||
| ) |
| struct indexCoordinate* index_getFirstCoordinate | ( | ) | [read] |
Definition at line 376 of file index.c.
References index_currentCoordinate, and index_getNextCoordinate().
00377 { 00378 // Reset counters 00379 index_currentCoordinate = 0; 00380 00381 // Get coordinate 00382 return index_getNextCoordinate(); 00383 }
Here is the call graph for this function:

| struct indexCoordinate* index_getNextCoordinate | ( | ) | [read] |
Definition at line 386 of file index.c.
References index_coordinates, index_currentCoordinate, and index_numCoordinates.
Referenced by index_getFirstCoordinate().
00387 { 00388 struct indexCoordinate* coordinate; 00389 00390 if (index_currentCoordinate >= index_numCoordinates) 00391 return NULL; 00392 00393 // Get current coordinate and return it 00394 coordinate = index_coordinates + index_currentCoordinate; 00395 index_currentCoordinate++; 00396 00397 return coordinate; 00398 }
Here is the caller graph for this function:

| void index_initializeBuild | ( | uint4 | fromCodeword, | |
| uint4 | toCodeword | |||
| ) |
Definition at line 32 of file index.c.
References wordList::allocated, global_malloc(), index_subjectNumber, index_words, wordList::lastOffset, wordList::lastSequenceNumber, wordList::length, wordList::offsets, and uint4.
Referenced by main().
00033 { 00034 uint4 codeword; 00035 00036 // index_numWords = pow(4, index_wordSize); 00037 index_words = (struct wordList*)global_malloc(sizeof(struct wordList) * (toCodeword - fromCodeword)); 00038 index_words -= fromCodeword; 00039 00040 // For each word 00041 codeword = fromCodeword; 00042 while (codeword < toCodeword) 00043 { 00044 // Initialize list of occurrences 00045 index_words[codeword].offsets = NULL; 00046 index_words[codeword].length = 0; 00047 index_words[codeword].allocated = 0; 00048 index_words[codeword].lastOffset = 0; 00049 index_words[codeword].lastSequenceNumber = 0; 00050 00051 codeword++; 00052 } 00053 00054 index_subjectNumber = 0; 00055 }
Here is the call graph for this function:

Here is the caller graph for this function:

| uint4 index_numWordOffsets | ( | uint4 | codeword | ) |
Definition at line 144 of file index.c.
References index_words, and wordList::length.
Referenced by main().
00145 { 00146 return index_words[codeword].length; 00147 }
Here is the caller graph for this function:

| void index_print | ( | ) |
Definition at line 462 of file index.c.
References index_numWords, index_words, wordList::length, wordList::offsets, uint4, and vbyte_getVbyte.
00463 { 00464 uint4 codeword = 0; 00465 struct wordList* wordList; 00466 unsigned char* offsets, *endOffsets; 00467 uint4 offsetGap, offset, numOffsets; 00468 uint4 totalSize = 0; 00469 00470 while (codeword < index_numWords) 00471 { 00472 numOffsets = 0; offset = 0; 00473 00474 wordList = index_words + codeword; 00475 00476 offsets = wordList->offsets; 00477 endOffsets = offsets + wordList->length; 00478 00479 totalSize += wordList->length; 00480 00481 if (offsets < endOffsets) 00482 printf("\nCodeword=%u:", codeword); 00483 00484 while (offsets < endOffsets) 00485 { 00486 vbyte_getVbyte(offsets, (&offsetGap)); 00487 offset += offsetGap; 00488 printf(" %u", offset); 00489 numOffsets++; 00490 } 00491 00492 // printf("[%d/%d = %f]\n", wordList->length, numOffsets, (float)(wordList->length) / (float)numOffsets); 00493 00494 codeword++; 00495 } 00496 00497 printf("\nTotal table size=%d bytes\n", totalSize); 00498 }
| void index_processQuery | ( | unsigned char * | startIndex, | |
| struct PSSMatrix | PSSMatrix, | |||
| uint4 | collectionSize | |||
| ) |
Definition at line 161 of file index.c.
References alignments_compareCodeword(), alignments_compareQueryPosition(), PSSMatrix::bestMatchCodes, blast_numHits, queryWord::codeword, encoding_numRegularLetters, queryWord::endOffsets, global_malloc(), index_coordinates, index_descriptionLocations, index_generateCodeword(), index_intervalSize, index_loadedWords, index_numCoordinates, index_numWords, index_offsets, index_sequenceCoordinates, index_sequencePositions, index_wordSize, PSSMatrix::length, memBlocks_free(), memBlocks_getCurrent(), memBlocks_initialize(), memBlocks_newEntry(), memBlocks_resetCurrent(), memBlocks::numTotalEntries, queryWord::offsets, wordList::offsets, PSSMatrix::queryCodes, indexCoordinate::queryOffset, queryWord::queryPosition, PSSMatrix::strandLength, indexCoordinate::subjectNumber, indexCoordinate::subjectOffset, uint4, and vbyte_getVbyte.
00163 { 00164 uint4 queryPosition, codeword = 0, queryPosition4; 00165 unsigned char* offsets, *endOffsets; 00166 uint4 offsetGap, offset, sequenceGap, sequenceNumber; 00167 struct indexCoordinate* coordinate; 00168 struct memBlocks* unsortedCoordinates; 00169 uint4 *numSubjectHits, numQueryPositions, queryWordCount, numOffsets; 00170 uint4 time, wordPosition, containsWildcard; 00171 struct queryWord* queryWords; 00172 00173 // Read word and interval size from start of index 00174 vbyte_getVbyte(startIndex, &index_wordSize); 00175 vbyte_getVbyte(startIndex, &index_intervalSize); 00176 00177 index_numWords = pow(4, index_wordSize); 00178 index_sequencePositions = (uint4*)startIndex; 00179 index_descriptionLocations = index_sequencePositions + numSequences; 00180 index_loadedWords = index_descriptionLocations + numSequences; 00181 index_offsets = (unsigned char*)(index_loadedWords + index_numWords + 1); 00182 00183 time = clock(); 00184 unsortedCoordinates = memBlocks_initialize(sizeof(struct indexCoordinate), numSequences); 00185 00186 // Declare and initialize array for count number of hits for each sequence 00187 numSubjectHits = (uint*)global_malloc(sizeof(uint4) * numSequences); 00188 sequenceNumber = 0; 00189 while (sequenceNumber < numSequences) 00190 { 00191 numSubjectHits[sequenceNumber] = 0; 00192 sequenceNumber++; 00193 } 00194 00195 // Memory to hold offsets string for each query word 00196 numQueryPositions = PSSMatrix.length - index_wordSize + 1; 00197 queryWords = (struct queryWord*)global_malloc(sizeof(struct queryWord) * numQueryPositions); 00198 00199 // For each word in the query 00200 queryPosition = 0; 00201 while (queryPosition < numQueryPositions) 00202 { 00203 // Check if the word contains a wildcard 00204 containsWildcard = 0; wordPosition = 0; 00205 while (wordPosition < index_wordSize) 00206 { 00207 if (PSSMatrix.queryCodes[queryPosition + wordPosition] >= encoding_numRegularLetters) 00208 containsWildcard = 1; 00209 00210 wordPosition++; 00211 } 00212 00213 // Don't include words that cross the strand boundry or contain wildcards 00214 if (!containsWildcard && !(queryPosition < PSSMatrix.strandLength && 00215 queryPosition >= PSSMatrix.strandLength - index_wordSize + 1)) 00216 { 00217 // printf("--Query position=%d\n", queryPosition); 00218 00219 // Get the codeword 00220 codeword = index_generateCodeword(PSSMatrix.bestMatchCodes + queryPosition, index_wordSize); 00221 00222 // Get wordlist for that codeword 00223 offsets = index_offsets + index_loadedWords[codeword]; 00224 endOffsets = index_offsets + index_loadedWords[codeword + 1]; 00225 00226 queryWords[queryPosition].offsets = offsets; 00227 queryWords[queryPosition].endOffsets = endOffsets; 00228 queryWords[queryPosition].queryPosition = queryPosition; 00229 queryWords[queryPosition].codeword = codeword; 00230 00231 // printf("codeword=%d start=%d end=%d numHits=%d\n", codeword, index_loadedWords[codeword], 00232 // index_loadedWords[codeword + 1], endOffsets - offsets); 00233 } 00234 else 00235 { 00236 queryWords[queryPosition].offsets = NULL; 00237 queryWords[queryPosition].endOffsets = NULL; 00238 queryWords[queryPosition].queryPosition = queryPosition; 00239 queryWords[queryPosition].codeword = codeword; 00240 } 00241 00242 // printf("\n"); 00243 queryPosition++; 00244 } 00245 00246 // Sort the query words by codeword 00247 qsort(queryWords, numQueryPositions, sizeof(struct queryWord), alignments_compareCodeword); 00248 00249 // For each query word 00250 queryWordCount = 0; 00251 while (queryWordCount < numQueryPositions) 00252 { 00253 // Ignoring those that cross the strand boundry 00254 if (queryWords[queryWordCount].offsets != NULL) 00255 { 00256 // Make in-memory copy of list of offsets 00257 numOffsets = queryWords[queryWordCount].endOffsets - queryWords[queryWordCount].offsets; 00258 offsets = (char*)global_malloc(sizeof(char) * numOffsets); 00259 00260 memcpy(offsets, queryWords[queryWordCount].offsets, numOffsets); 00261 queryWords[queryWordCount].offsets = offsets; 00262 queryWords[queryWordCount].endOffsets = offsets + numOffsets; 00263 } 00264 00265 queryWordCount++; 00266 } 00267 00268 // Sort the query words by query position 00269 qsort(queryWords, numQueryPositions, sizeof(struct queryWord), alignments_compareQueryPosition); 00270 00271 queryPosition = 0; 00272 while (queryPosition < numQueryPositions) 00273 { 00274 // Ignoring those that cross the strand boundry 00275 if (queryWords[queryPosition].offsets != NULL) 00276 { 00277 offsets = queryWords[queryPosition].offsets; 00278 endOffsets = queryWords[queryPosition].endOffsets; 00279 offset = 0; 00280 sequenceNumber = 0; 00281 queryPosition4 = queryPosition + (index_wordSize - 4); 00282 00283 // Traverse the offsets 00284 while (offsets < endOffsets) 00285 { 00286 vbyte_getVbyte(offsets, (&sequenceGap)); 00287 vbyte_getVbyte(offsets, (&offsetGap)); 00288 00289 // printf("[%d,%d]\n", sequenceGap, offsetGap); 00290 00291 if (sequenceGap > 0) 00292 { 00293 offset = offsetGap; 00294 sequenceNumber += sequenceGap; 00295 } 00296 else 00297 { 00298 offset += offsetGap; 00299 } 00300 // printf(" %u", offset); 00301 00302 // Add query/database coordinate of match to relevant bucket 00303 // printf("Sequence number=%d\n", sequenceNumber); 00304 coordinate = (struct indexCoordinate*)memBlocks_newEntry(unsortedCoordinates); 00305 coordinate->queryOffset = queryPosition4; 00306 coordinate->subjectOffset = offset * index_intervalSize + (index_wordSize - 4); 00307 coordinate->subjectNumber = sequenceNumber; 00308 00309 numSubjectHits[sequenceNumber]++; 00310 // printf("[%d,%d]\n", queryPosition, offset); 00311 00312 blast_numHits++; 00313 } 00314 00315 free(queryWords[queryPosition].offsets); 00316 } 00317 00318 queryPosition++; 00319 } 00320 00321 00322 printf("Time to process query=%f\n", (float)(clock() - time) / CLOCKS_PER_SEC); 00323 time = clock(); 00324 00325 // Make memory for sorted list 00326 index_numCoordinates = unsortedCoordinates->numTotalEntries; 00327 index_coordinates = (struct indexCoordinate*)global_malloc( 00328 sizeof(struct indexCoordinate) * index_numCoordinates); 00329 index_sequenceCoordinates = (struct indexCoordinate**)global_malloc( 00330 sizeof(struct indexCoordinate*) * numSequences); 00331 00332 // For each sequence 00333 coordinate = index_coordinates; 00334 sequenceNumber = 0; 00335 while (sequenceNumber < numSequences) 00336 { 00337 // If it has hits 00338 if (numSubjectHits[sequenceNumber] != 0) 00339 { 00340 // Point to location in sorted list of coordinates 00341 index_sequenceCoordinates[sequenceNumber] = coordinate; 00342 coordinate += numSubjectHits[sequenceNumber]; 00343 00344 numSubjectHits[sequenceNumber] = 0; 00345 } 00346 sequenceNumber++; 00347 } 00348 00349 // Move through list of unsorted coordinates 00350 memBlocks_resetCurrent(unsortedCoordinates); 00351 while ((coordinate = memBlocks_getCurrent(unsortedCoordinates)) != NULL) 00352 { 00353 sequenceNumber = coordinate->subjectNumber; 00354 // printf("%d,%d=[%d]\n", index_sequenceCoordinates[sequenceNumber], numSubjectHits[sequenceNumber], sequenceNumber); 00355 // Place into sorted list 00356 index_sequenceCoordinates[sequenceNumber][numSubjectHits[sequenceNumber]] = *coordinate; 00357 numSubjectHits[sequenceNumber]++; 00358 } 00359 00360 memBlocks_free(unsortedCoordinates); 00361 00362 /* // Print sorted coordinates 00363 coordinate = index_coordinates; 00364 while (coordinate < index_coordinates + index_numCoordinates) 00365 { 00366 printf("[%d]", coordinate); 00367 printf("Subject %d Offset %d,%d\n", coordinate->subjectNumber, coordinate->queryOffset, 00368 coordinate->subjectOffset); 00369 coordinate++; 00370 }*/ 00371 00372 printf("Time to sort buckets=%f\n", (float)(clock() - time) / CLOCKS_PER_SEC); 00373 }
Here is the call graph for this function:

| uint4* index_wordOffsetPositions | ( | ) |
| unsigned char* index_wordOffsets | ( | uint4 | codeword | ) |
Definition at line 150 of file index.c.
References index_words, and wordList::offsets.
Referenced by main().
00151 { 00152 return index_words[codeword].offsets; 00153 }
Here is the caller graph for this function:

| struct indexCoordinate* index_coordinates |
Definition at line 39 of file index.h.
Referenced by index_getNextCoordinate(), and index_processQuery().
| uint4* index_descriptionLocations |
| uint4 index_intervalSize |
Definition at line 19 of file index.c.
Referenced by index_addSubject(), index_processQuery(), and main().
| uint4 index_numCoordinates |
Definition at line 40 of file index.h.
Referenced by index_getNextCoordinate(), and index_processQuery().
| struct indexCoordinate** index_sequenceCoordinates |
| uint4* index_sequencePositions |
| uint4 index_wordSize |
Definition at line 19 of file index.c.
Referenced by index_addSubject(), index_processQuery(), and main().
1.5.2