#include "blast.h"#include "vbyte2.h"#include "readindex.h"#include "vec.h"#include "vbyte2.c"#include "readindex.c"#include "vec.c"#include "identityAlign.c"Include dependency graph for rsdb.c:

Go to the source code of this file.
Data Structures | |
| struct | longList |
| struct | sequenceLists |
| struct | diagonal |
| struct | sequenceMatch |
| struct | sequence |
| struct | parent |
Defines | |
| #define | numHashtablePasses 50 |
| #define | wildcardSumWindow 100 |
| #define | percentMatchesRequired 1 |
| #define | rsdb_threshold 90 |
Functions | |
| int4 | rsdb_compareDescriptionLocations (const void *sequence1, const void *sequence2) |
| int4 | rsdb_compareSequenceLengths (const void *sequence1, const void *sequence2) |
| void | rsdb_printSequence (uint4 whitespace, unsigned char *sequence, uint4 length) |
| void | rsdb_processSequenceMatches (struct memSingleBlock *sequenceMatches, struct sequence *sequences) |
| void | rsdb_processList (struct memSingleBlock *diagonals, struct wordLocation *wordLocations, uint4 numWordLocations) |
| void | rsdb_printCluster (struct parent *parent) |
| uint4 | rsdb_score (struct sequence *seq1, struct sequence *seq2, int4 relativeOffset, float wildscoreAllowed) |
| void | rsdb_newParent (struct parent *newParent, struct sequence *child) |
| void | rsdb_addChild (struct parent *parent, struct sequence *child, int4 relativeOffset) |
| wordLocation * | rsdb_mergeLists (struct wordLocation *wordLocations1, uint4 *numWordLocations1, struct wordLocation *wordLocations2, uint4 *numWordLocations2, int4 mergeRelativeOffset, uint *lengthNewList) |
| void | rsdb_printList (struct wordLocation *list, uint4 length) |
| int4 | rsdb_calculateSavings () |
| void | rsdb_writeClusters (char *filename, struct sequence *sequences) |
| void | rsdb_freePSSM () |
| void | rsdb_spexClusterSequences (struct sequence *sequences, uint4 numberOfSequences, uint4 numberOfLetters) |
| void | rsdb_updateDiagonalMatches (struct memSingleBlock *diagonals, struct wordLocation location1, struct wordLocation location2) |
| int4 | main (int4 argc, char *argv[]) |
| int4 | rsdb_compareListLengths (const void *list1, const void *list2) |
| Hashcounter * | rsdb_slottedSpex (struct sequence *sequences, uint4 numberOfSequences, uint4 numberOfLetters) |
| Hashcounter * | rsdb_winnowingSpex (struct sequence *sequences, uint4 numberOfSequences, uint4 numberOfLetters) |
Variables | |
| uint4 | decoWordLength = 30 |
| uint4 | numIterations = 3 |
| uint4 | decoQantum = 9 |
| uint4 | maxListLength = 20 |
| float | rsdb_averageMatchScore = 0 |
| float * | rsdb_averageWildcodeScores = NULL |
| float | wildcardsThreshold = 0.25 |
| uint4 | rsdb_numMergeDiagonals |
| memBlocks * | rsdb_parents |
| uint4 | rsdb_numParents = 0 |
| uint4 ** | rsdb_PSSM |
| int4 | rsdb_PSSMstart |
| int4 | rsdb_PSSMend |
| int4 | rsdb_PSSMlength |
| uint4 | rsdb_accumulatorsSize = 0 |
| #define numHashtablePasses 50 |
Definition at line 23 of file rsdb.c.
Referenced by cluster_spexClusterSequences(), and rsdb_spexClusterSequences().
| #define rsdb_threshold 90 |
| int4 main | ( | int4 | argc, | |
| char * | argv[] | |||
| ) |
Definition at line 123 of file rsdb.c.
References scoreMatrix::averageMatchScore, decoQantum, decoWordLength, sequence::descriptionLength, sequence::descriptionLocation, encoding_alphabetTypes, encoding_free(), encoding_initialize(), encoding_protein, global_free(), global_malloc(), identityAlign_free(), sequence::length, maxListLength, memBlocks_free(), memBlocks_initialize(), sequence::number, numIterations, parameters_findScoringMatrix(), parameters_free(), parameters_scoringMatrixPath, sequence::parent, readdb_close(), readdb_dbAlphabetType, readdb_longestSequenceLength, readdb_numberOfClusters, readdb_numberOfLetters, readdb_numberOfSequences, readdb_open(), readdb_readSequence(), rsdb_averageMatchScore, rsdb_averageWildcodeScores, rsdb_calculateSavings(), rsdb_compareSequenceLengths(), rsdb_parents, rsdb_spexClusterSequences(), rsdb_writeClusters(), scoreMatrix_free(), scoreMatrix_load(), sequence::sequence, wordLocation::sequenceNumber, uint4, wildcards_initialize(), wildcards_readWildcards(), and wildcards_scoreMatrix.
00124 { 00125 unsigned char *filename, *sequence; 00126 uint4 descriptionStart = 0, descriptionLength = 0, sequenceLength; 00127 uint4 sequenceNumber = 0, encodedLength; 00128 struct sequence* sequences; 00129 00130 // User must provide FASTA format file at command line 00131 if (argc != 2 && argc != 6) 00132 { 00133 fprintf(stderr, "Useage: rsdb <database> [decoWordLength] [numIterations] [decoQantum] [maxListLength]\n"); 00134 exit(-1); 00135 } 00136 filename = argv[1]; 00137 00138 if (argc == 6) 00139 { 00140 decoWordLength = atoi(argv[2]); 00141 numIterations = atoi(argv[3]); 00142 decoQantum = atoi(argv[4]); 00143 maxListLength = atoi(argv[5]); 00144 } 00145 00146 // Open formatted file for reading 00147 readdb_open(filename); 00148 00149 printf("Number of sequences = %u\n", readdb_numberOfSequences); 00150 printf("Total number of letters = %llu\n", readdb_numberOfLetters); 00151 printf("Length of longest sequence = %u\n", readdb_longestSequenceLength); 00152 printf("Alphabet type = %s\n", encoding_alphabetTypes[readdb_dbAlphabetType]); 00153 00154 if (readdb_dbAlphabetType != encoding_protein) 00155 { 00156 fprintf(stderr, "Error: Only protein collection clustering currently supported\n"); 00157 exit(-1); 00158 } 00159 00160 if (readdb_numberOfSequences != readdb_numberOfClusters) 00161 { 00162 fprintf(stderr, "Error clustering %s: collection has already been clustered\n", filename); 00163 exit(-1); 00164 } 00165 00166 sequences = (struct sequence*)global_malloc(sizeof(struct sequence) * readdb_numberOfSequences); 00167 00168 // Initialize codes array 00169 encoding_initialize(readdb_dbAlphabetType); 00170 00171 // Initialize cluster wildcards 00172 wildcards_initialize(); 00173 00174 if (argc == 5) 00175 { 00176 // Read in a set of wildcards 00177 wildcards_readWildcards(argv[4]); 00178 } 00179 00180 // Load score matrix 00181 parameters_findScoringMatrix(); 00182 wildcards_scoreMatrix = scoreMatrix_load(parameters_scoringMatrixPath); 00183 00184 // Calculate average score for matching two residues 00185 rsdb_averageMatchScore = wildcards_scoreMatrix.averageMatchScore; 00186 00187 // Move through index file reading sequence codes and converting back to ASCII sequences 00188 sequenceNumber = 0; 00189 while (readdb_readSequence(&sequence, &sequenceLength, &descriptionStart, 00190 &descriptionLength, &encodedLength)) 00191 { 00192 // Record sequence, sequence length, number, no parent 00193 sequences[sequenceNumber].sequence = sequence; 00194 sequences[sequenceNumber].length = sequenceLength; 00195 sequences[sequenceNumber].number = sequenceNumber; 00196 sequences[sequenceNumber].parent = NULL; 00197 sequences[sequenceNumber].descriptionLength = descriptionLength; 00198 sequences[sequenceNumber].descriptionLocation = descriptionStart; 00199 00200 sequenceNumber++; 00201 } 00202 00203 if (sequenceNumber != readdb_numberOfSequences) 00204 { 00205 fprintf(stderr, "Error: only %d sequences read\n", sequenceNumber); 00206 exit(-1); 00207 } 00208 00209 printf("%d sequences read.\n", sequenceNumber); 00210 fflush(stdout); 00211 00212 // Sort sequences by length 00213 qsort(sequences, readdb_numberOfSequences, sizeof(struct sequence), 00214 rsdb_compareSequenceLengths); 00215 00216 // Update sequence numbers 00217 sequenceNumber = 0; 00218 while (sequenceNumber < readdb_numberOfSequences) 00219 { 00220 sequences[sequenceNumber].number = sequenceNumber; 00221 sequenceNumber++; 00222 } 00223 00224 // Initialize array for storing parents 00225 rsdb_parents = memBlocks_initialize(sizeof(struct parent), 10000); 00226 00227 // Simple approach to clustering sequences 00228 // rsdb_simpleClusterSequences(sequences, sequenceNumber, readdb_numberOfLetters); 00229 00230 // Perform SPEX algorithm and cluster sequences 00231 rsdb_spexClusterSequences(sequences, sequenceNumber, readdb_numberOfLetters); 00232 00233 // Cluster sequences in the collection 00234 // rsdb_clusterSequences(sequences, sequenceNumber, readdb_numberOfLetters, filename); 00235 00236 printf("Total bytes saved=%d\n", rsdb_calculateSavings()); fflush(stdout); 00237 00238 // printf("END Total malloced=%s\n", global_int4toString(global_totalMalloc)); fflush(stdout); 00239 00240 // Load sequence descriptions from disk 00241 // rsdb_loadDescriptions(sequences); 00242 00243 // Write clusters to disk 00244 rsdb_writeClusters(filename, sequences); 00245 00246 // Close reading formatted database 00247 readdb_close(); 00248 00249 free(rsdb_averageWildcodeScores); 00250 free(sequences); 00251 memBlocks_free(rsdb_parents); 00252 encoding_free(); 00253 global_free(); 00254 scoreMatrix_free(wildcards_scoreMatrix); 00255 parameters_free(); 00256 identityAlign_free(); 00257 return 0; 00258 }
Here is the call graph for this function:

Definition at line 1221 of file rsdb.c.
References parent::children, global_realloc(), parent::length, sequence::length, parent::numChildren, sequence::parent, and sequence::regionStart.
Referenced by rsdb_spexClusterSequences().
01222 { 01223 // If child is longer than parent 01224 if (child->length > parent->length) 01225 return; 01226 01227 // Add new child 01228 child->regionStart = 0; 01229 // child->length = parent->length; 01230 child->parent = parent; 01231 // child->sequence = parent->sequence; 01232 parent->numChildren++; 01233 parent->children = (struct sequence**)global_realloc(parent->children, 01234 sizeof(struct sequence*) * parent->numChildren); 01235 parent->children[parent->numChildren - 1] = child; 01236 01237 // printf("Added %d: \n", child->number); 01238 // rsdb_printCluster(parent); 01239 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int4 rsdb_calculateSavings | ( | ) |
Definition at line 1120 of file rsdb.c.
References parent::children, int4, parent::length, sequence::length, memBlocks_getCurrent(), memBlocks_resetCurrent(), sequence::number, parent::numChildren, sequence::parent, and rsdb_parents.
Referenced by main().
01121 { 01122 int4 count, childNum, numCharsSaved, bytesSaved = 0, numWilds; 01123 struct sequence* child; 01124 struct parent* parent; 01125 01126 // For each parent 01127 memBlocks_resetCurrent(rsdb_parents); 01128 while ((parent = memBlocks_getCurrent(rsdb_parents)) != NULL) 01129 { 01130 numWilds = 0; 01131 01132 // If it has children 01133 if (parent->children != NULL) 01134 { 01135 // For each child calculate number of characters saved 01136 numCharsSaved = 0; 01137 childNum = 0; 01138 while (childNum < parent->numChildren) 01139 { 01140 child = parent->children[childNum]; 01141 01142 // CHECK: no child has two parents 01143 if (child->parent != parent) 01144 { 01145 printf("Error sequence %d has multiple parents\n", child->number); 01146 exit(-1); 01147 } 01148 01149 numCharsSaved += child->length; 01150 childNum++; 01151 } 01152 01153 // Minus cost of parent 01154 bytesSaved += numCharsSaved - parent->length; 01155 01156 if (numCharsSaved < parent->length) 01157 { 01158 printf("Error parent length %d with %d children saves negative bytes", 01159 parent->length, parent->numChildren); 01160 exit(-1); 01161 } 01162 } 01163 } 01164 01165 return bytesSaved; 01166 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int4 rsdb_compareDescriptionLocations | ( | const void * | sequence1, | |
| const void * | sequence2 | |||
| ) |
Definition at line 352 of file rsdb.c.
References sequence::descriptionLocation.
Referenced by rsdb_writeClusters().
00353 { 00354 const struct sequence *a1, *a2; 00355 00356 a1 = (struct sequence*)sequence1; 00357 a2 = (struct sequence*)sequence2; 00358 00359 if (a1->descriptionLocation > a2->descriptionLocation) 00360 { 00361 return -1; 00362 } 00363 else if (a1->descriptionLocation < a2->descriptionLocation) 00364 { 00365 return 1; 00366 } 00367 else 00368 { 00369 return 0; 00370 } 00371 }
Here is the caller graph for this function:

| int4 rsdb_compareListLengths | ( | const void * | list1, | |
| const void * | list2 | |||
| ) |
Definition at line 396 of file rsdb.c.
References finalList::numEntries.
00397 { 00398 const struct finalList *a1, *a2; 00399 00400 a1 = (struct finalList*)list1; 00401 a2 = (struct finalList*)list2; 00402 00403 if (a1->numEntries > a2->numEntries) 00404 { 00405 return -1; 00406 } 00407 else if (a1->numEntries < a2->numEntries) 00408 { 00409 return 1; 00410 } 00411 else 00412 { 00413 return 0; 00414 } 00415 }
| int4 rsdb_compareSequenceLengths | ( | const void * | sequence1, | |
| const void * | sequence2 | |||
| ) |
Definition at line 374 of file rsdb.c.
References sequence::length.
Referenced by main().
00375 { 00376 const struct sequence *a1, *a2; 00377 00378 a1 = (struct sequence*)sequence1; 00379 a2 = (struct sequence*)sequence2; 00380 00381 if (a1->length > a2->length) 00382 { 00383 return -1; 00384 } 00385 else if (a1->length < a2->length) 00386 { 00387 return 1; 00388 } 00389 else 00390 { 00391 return 0; 00392 } 00393 }
Here is the caller graph for this function:

| void rsdb_freePSSM | ( | ) |
| struct wordLocation* rsdb_mergeLists | ( | struct wordLocation * | wordLocations1, | |
| uint4 * | numWordLocations1, | |||
| struct wordLocation * | wordLocations2, | |||
| uint4 * | numWordLocations2, | |||
| int4 | mergeRelativeOffset, | |||
| uint * | lengthNewList | |||
| ) | [read] |
Definition at line 1205 of file rsdb.c.
References parent::children, global_malloc(), sequence::length, parent::length, parent::number, parent::numChildren, sequence::parent, sequence::regionStart, rsdb_numParents, sequence::sequence, and parent::sequence.
Referenced by rsdb_spexClusterSequences().
01206 { 01207 // Make copy of the child 01208 parent->number = rsdb_numParents; rsdb_numParents++; 01209 parent->length = child->length; 01210 parent->sequence = child->sequence; 01211 01212 // Link parent to child, and vice versa 01213 parent->numChildren = 1; 01214 parent->children = (struct sequence**)global_malloc(sizeof(struct sequence*)); 01215 parent->children[0] = child; 01216 child->regionStart = 0; 01217 child->parent = parent; 01218 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void rsdb_printCluster | ( | struct parent * | parent | ) |
Definition at line 1169 of file rsdb.c.
References parent::children, int4, sequence::length, parent::length, sequence::number, parent::number, parent::numChildren, sequence::parent, print_singleSequence(), sequence::regionStart, sequence::sequence, and parent::sequence.
01170 { 01171 int4 count, childNum, numCharsSaved; 01172 struct sequence* child; 01173 01174 printf("### Parent %d with %d children ###\nParent ", parent->number, parent->numChildren); 01175 print_singleSequence(parent->sequence, parent->length); printf("\n"); 01176 01177 // For each child 01178 numCharsSaved = 0; 01179 childNum = 0; 01180 while (childNum < parent->numChildren) 01181 { 01182 child = parent->children[childNum]; 01183 01184 printf("[%7d] ", child->number); 01185 // Print spaces to align child to parent 01186 count = 0; 01187 while (count < child->regionStart) 01188 { 01189 printf(" "); 01190 count++; 01191 } 01192 01193 // Print child 01194 print_singleSequence(child->sequence, child->length); printf("\n"); 01195 // printf("Child Length=%d start=%d\n", child->length, child->regionStart); 01196 01197 numCharsSaved += child->length; 01198 childNum++; 01199 } 01200 printf("%d bytes saved\n", numCharsSaved - parent->length); 01201 printf("\n"); 01202 }
Here is the call graph for this function:

| void rsdb_printList | ( | struct wordLocation * | list, | |
| uint4 | length | |||
| ) |
| void rsdb_printSequence | ( | uint4 | whitespace, | |
| unsigned char * | sequence, | |||
| uint4 | length | |||
| ) |
Definition at line 1094 of file rsdb.c.
References print_singleSequence().
01095 { 01096 // Print whitespace before sequence begins 01097 while (whitespace > 0) 01098 { 01099 printf(" "); 01100 whitespace--; 01101 } 01102 01103 print_singleSequence(sequence, length); printf("\n"); 01104 }
Here is the call graph for this function:

| void rsdb_processList | ( | struct memSingleBlock * | diagonals, | |
| struct wordLocation * | wordLocations, | |||
| uint4 | numWordLocations | |||
| ) |
Definition at line 1013 of file rsdb.c.
References rsdb_updateDiagonalMatches(), wordLocation::sequenceNumber, and uint4.
Referenced by rsdb_spexClusterSequences().
01015 { 01016 uint4 count1, count2; 01017 struct wordLocation location1, location2; 01018 01019 // For each pair of locations 01020 count1 = 0; 01021 while (count1 < numWordLocations) 01022 { 01023 count2 = count1 + 1; 01024 while (count2 < numWordLocations) 01025 { 01026 // location1 < location2 01027 if (wordLocations[count1].sequenceNumber < wordLocations[count2].sequenceNumber) 01028 { 01029 location1 = wordLocations[count1]; 01030 location2 = wordLocations[count2]; 01031 } 01032 else 01033 { 01034 location2 = wordLocations[count1]; 01035 location1 = wordLocations[count2]; 01036 } 01037 01038 // Update accumulators by adding match between two locations 01039 rsdb_updateDiagonalMatches(diagonals, location1, location2); 01040 01041 count2++; 01042 } 01043 count1++; 01044 } 01045 01046 // printf("listLength=%d numNewDiagonalMatches=%d\n", numWordLocations, 01047 // numNewDiagonalMatches); 01048 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void rsdb_processSequenceMatches | ( | struct memSingleBlock * | sequenceMatches, | |
| struct sequence * | sequences | |||
| ) |
| uint4 rsdb_score | ( | struct sequence * | seq1, | |
| struct sequence * | seq2, | |||
| int4 | relativeOffset, | |||
| float | wildscoreAllowed | |||
| ) |
Definition at line 1242 of file rsdb.c.
References identityAlign_score(), int4, sequence::length, rsdb_threshold, sequence::sequence, and uint4.
Referenced by rsdb_spexClusterSequences().
01244 { 01245 uint4 score, targetScore, identity; 01246 unsigned char* sequence1, *sequence2; 01247 int4 length1, length2, shorterLength; 01248 01249 sequence1 = seq1->sequence; 01250 length1 = seq1->length; 01251 sequence2 = seq2->sequence; 01252 length2 = seq2->length; 01253 01254 if (length1 < length2) 01255 shorterLength = length1; 01256 else 01257 shorterLength = length2; 01258 01259 targetScore = (int)((float)rsdb_threshold * (float)shorterLength / 100.0); 01260 score = identityAlign_score(sequence2, length2, sequence1, length1, -relativeOffset, 01261 targetScore); 01262 01263 identity = 100.0 * (float)score / (float)shorterLength; 01264 01265 if (identity < rsdb_threshold) 01266 return 0; 01267 01268 // printf("score/target=%d,%d\n", score, targetScore); 01269 01270 // printf("lengths=%d,%d\n", length1, length2); 01271 // printf("%d:%d\n", (end1 - start1), score); 01272 01273 // Return score 01274 return identity; 01275 }
Here is the call graph for this function:

Here is the caller graph for this function:

| Hashcounter* rsdb_slottedSpex | ( | struct sequence * | sequences, | |
| uint4 | numberOfSequences, | |||
| uint4 | numberOfLetters | |||
| ) |
Definition at line 418 of file rsdb.c.
References decoQantum, decoWordLength, hashcounter_count(), hashcounter_free(), hashcounter_hash, hashcounter_insert(), hashcounter_multiple(), hashcounter_new(), hashcounter_single(), sequence::length, numIterations, sequence::sequence, and uint4.
Referenced by rsdb_spexClusterSequences().
00420 { 00421 Hashcounter *hashCounter1 = NULL, *hashCounter2 = NULL; 00422 uint4 hashCounterSize = 1; 00423 uint4 words, duplicates, iteration = 0, seed, wordLength; 00424 uint4 sinceChange, submatches, lookup; 00425 uint4 sequenceNumber, sequenceSize, offset, suboffset; 00426 unsigned char* sequence, *word; 00427 uint4 time; 00428 uint4 hashValue1, hashValue2, position; 00429 00430 // Calculate size of hashcounter 00431 while (pow(2,hashCounterSize) < numberOfLetters / decoQantum) 00432 { 00433 hashCounterSize++; 00434 } 00435 hashCounter2 = hashcounter_new(hashCounterSize, 0); 00436 00437 wordLength = decoWordLength - ((numIterations - 1) * decoQantum); 00438 00439 while (iteration < numIterations) 00440 { 00441 time = -clock(); 00442 words = 0; duplicates = 0; 00443 sinceChange = 0; 00444 00445 // For each sequence 00446 sequenceNumber = 0; 00447 while (sequenceNumber < numberOfSequences) 00448 { 00449 sequence = sequences[sequenceNumber].sequence; 00450 sequenceSize = sequences[sequenceNumber].length; 00451 00452 // For each sequence longer enough that hasn't been excluded 00453 if (sequenceSize >= wordLength) 00454 { 00455 // For each word 00456 offset = 0; 00457 while (offset < sequenceSize - wordLength) 00458 { 00459 // If word has occured before 00460 word = sequence + offset; 00461 hashcounter_hash(hashCounter2, word, wordLength, hashValue2, position); 00462 lookup = hashcounter_single(hashCounter2, hashValue2); 00463 00464 if (lookup) 00465 sinceChange = 0; 00466 00467 if (lookup == 1) 00468 { 00469 // If it has only occured once, now it has occured twice 00470 hashcounter_insert(hashCounter2, hashValue2); 00471 } 00472 else 00473 { 00474 submatches = 0; 00475 // First iteration 00476 if (hashCounter1 != NULL) 00477 { 00478 suboffset = offset; 00479 while (suboffset <= offset + decoQantum) 00480 { 00481 word = sequence + suboffset; 00482 hashcounter_hash(hashCounter1, word, wordLength - decoQantum, hashValue1, position); 00483 if (hashcounter_multiple(hashCounter1, hashValue1)) 00484 submatches++; 00485 suboffset++; 00486 } 00487 } 00488 if (sinceChange >= decoQantum && (hashCounter1 == NULL || submatches >= 2)) 00489 { 00490 hashcounter_insert(hashCounter2, hashValue2); 00491 sinceChange = 0; 00492 } 00493 } 00494 00495 sinceChange++; 00496 offset++; 00497 } 00498 } 00499 00500 sequenceNumber++; 00501 } 00502 00503 if (hashCounter1) 00504 hashcounter_free(hashCounter1); 00505 hashCounter1 = hashCounter2; 00506 seed = rand(); 00507 00508 if (iteration != numIterations - 1) 00509 hashCounter2 = hashcounter_new(hashCounterSize, 0); 00510 00511 printf("Iteration %d WordLength=%d\n", iteration, wordLength); fflush(stdout); 00512 hashcounter_count(hashCounter1, stdout); fflush(stdout); 00513 // printf("Total malloced=%s\n", global_int4toString(global_totalMalloc)); fflush(stdout); 00514 00515 time += clock(); 00516 printf("Iteration time=%.2f secs\n", (float)time / CLOCKS_PER_SEC); fflush(stdout); 00517 00518 iteration++; 00519 wordLength += decoQantum; 00520 } 00521 00522 return hashCounter1; 00523 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void rsdb_spexClusterSequences | ( | struct sequence * | sequences, | |
| uint4 | numberOfSequences, | |||
| uint4 | numberOfLetters | |||
| ) |
Definition at line 682 of file rsdb.c.
References parent::children, decoWordLength, global_int4toString(), global_malloc(), global_totalMalloc, hashcounter_free(), hashcounter_hash, hashcounter_multiple(), int4, sequence::length, sequenceLists::longList, diagonal::matchSequence, maxListLength, memBlocks_free(), memBlocks_getCurrent(), memBlocks_initialize(), memBlocks_newEntry(), memBlocks_resetCurrent(), memSingleBlock_free(), memSingleBlock_getCurrent(), memSingleBlock_initialize(), memSingleBlock_initializeExisting(), memSingleBlock_resetCurrent(), finalList::numEntries, memSingleBlock::numEntries, numHashtablePasses, sequence::parent, postings_addEntry(), postings_decodeList(), postings_getSortedLists(), postings_initialize(), postings_numLists, postings_print(), sequence::regionStart, diagonal::relativeOffset, rsdb_accumulatorsSize, rsdb_addChild(), rsdb_newParent(), rsdb_parents, rsdb_processList(), rsdb_score(), rsdb_slottedSpex(), rsdb_updateDiagonalMatches(), sequence::sequence, wordLocation::sequenceNumber, uint4, and wildcardsThreshold.
Referenced by main().
00684 { 00685 Hashcounter *hashCounter; 00686 uint4 seed, wordLength, position, hashValue; 00687 uint4 sequenceNumber, sequenceSize, offset; 00688 unsigned char* sequence, *word; 00689 uint4 hashtablePass = 0, passhash; 00690 int4 key, numWordLocations; 00691 struct wordLocation* wordLocations, *longListWordLocations; 00692 int4 relativeOffset; 00693 uint4 sequence1, sequence2, score, location1, location2; 00694 struct memSingleBlock* diagonals, *sequenceDiagonals; 00695 struct diagonal* diagonalMatch; 00696 struct sequenceMatch *sequenceMatch; 00697 struct memSingleBlock* sequenceMatches; 00698 struct sequence* seq1, *seq2; 00699 uint4 time, numPairs; 00700 struct parent* parent; 00701 struct finalList* finalLists; 00702 struct longList *longList; 00703 struct sequenceLists **sequencesLists, *sequenceLists; 00704 struct memBlocks *blockLongLists, *blockSequenceLists; 00705 uint4 newListLength; 00706 00707 // Perform SPEX algorithm 00708 // hashCounter = rsdb_winnowingSpex(sequences, numberOfSequences, numberOfLetters); 00709 hashCounter = rsdb_slottedSpex(sequences, numberOfSequences, numberOfLetters); 00710 00711 // Memory to store long lists 00712 blockLongLists = memBlocks_initialize(sizeof(struct longList), 10000); 00713 blockSequenceLists = memBlocks_initialize(sizeof(struct sequenceLists), 10000); 00714 00715 // Temporary buffer for storing word locations 00716 wordLocations = (struct wordLocation*)global_malloc(sizeof(struct wordLocation) * numberOfSequences); 00717 00718 // Initialize struct to hold long lists 00719 sequencesLists = (struct sequenceLists**)global_malloc(sizeof(struct sequenceLists*) * numberOfSequences); 00720 00721 // Initialize diagonals 00722 diagonals = (struct memSingleBlock*)global_malloc(sizeof(struct memSingleBlock) 00723 * numberOfSequences); 00724 sequenceNumber = 0; 00725 while (sequenceNumber < numberOfSequences) 00726 { 00727 memSingleBlock_initializeExisting(diagonals + sequenceNumber, sizeof(struct diagonal), 1); 00728 sequencesLists[sequenceNumber] = NULL; 00729 sequenceNumber++; 00730 } 00731 00732 printf("Initialized diagonals\n"); fflush(stdout); 00733 00734 wordLength = decoWordLength; 00735 seed = rand(); 00736 00737 printf("Constructing and processing postings lists"); fflush(stdout); 00738 // Process collection a section at a time 00739 while (hashtablePass < numHashtablePasses) 00740 { 00741 time = -clock(); 00742 postings_initialize(); 00743 00744 // For each sequence 00745 sequenceNumber = 0; 00746 while (sequenceNumber < numberOfSequences) 00747 { 00748 sequence = sequences[sequenceNumber].sequence; 00749 sequenceSize = sequences[sequenceNumber].length; 00750 00751 // For each sequence long enough that hasn't been excluded 00752 if (sequenceSize >= wordLength) 00753 { 00754 // Create initial passhash 00755 passhash = 0; 00756 offset = 0; 00757 while (offset < wordLength) 00758 { 00759 passhash += sequence[offset]; 00760 offset++; 00761 } 00762 00763 // For each word 00764 offset = 0; 00765 while (offset < sequenceSize - wordLength) 00766 { 00767 // If to be processed this pass 00768 if (passhash % numHashtablePasses == hashtablePass) 00769 { 00770 // Insert into hash table 00771 word = sequence + offset; 00772 hashcounter_hash(hashCounter, word, wordLength, hashValue, position); 00773 if (hashcounter_multiple(hashCounter, hashValue)) 00774 { 00775 postings_addEntry(sequence + offset, wordLength, sequenceNumber, offset); 00776 } 00777 } 00778 00779 // Update passhash 00780 passhash -= sequence[offset]; 00781 passhash += sequence[offset + wordLength]; 00782 offset++; 00783 } 00784 } 00785 00786 sequenceNumber++; 00787 } 00788 00789 // Report the load on the hashtable 00790 printf("Hashtable pass %d\n", hashtablePass); 00791 postings_print(); 00792 printf("Total malloced=%s\n", global_int4toString(global_totalMalloc)); 00793 time += clock(); 00794 printf("Hashtable construction time=%f\n", (float)time / CLOCKS_PER_SEC); fflush(stdout); 00795 time = -clock(); 00796 00797 // Sort hash table entries in order of size 00798 finalLists = postings_getSortedLists(); 00799 00800 // Start with the longest list 00801 key = postings_numLists - 1; 00802 while (key >= 0) 00803 { 00804 // If list is too long to process 00805 if (finalLists[key].numEntries > maxListLength) 00806 { 00807 // Decode the long list 00808 longListWordLocations = (struct wordLocation*)global_malloc( 00809 sizeof(struct wordLocation) * finalLists[key].numEntries); 00810 numWordLocations = postings_decodeList(finalLists[key], longListWordLocations); 00811 00812 longList = (struct longList*)memBlocks_newEntry(blockLongLists); 00813 longList->list = longListWordLocations; 00814 longList->length = numWordLocations; 00815 00816 rsdb_accumulatorsSize += sizeof(struct longList); 00817 rsdb_accumulatorsSize += sizeof(struct wordLocation) * finalLists[key].numEntries; 00818 00819 // For each sequence in the list 00820 while (numWordLocations > 0) 00821 { 00822 // Add a reference to this long list 00823 numWordLocations--; 00824 sequenceLists = (struct sequenceLists*)memBlocks_newEntry(blockSequenceLists); 00825 sequenceLists->next = sequencesLists[longListWordLocations[numWordLocations].sequenceNumber]; 00826 sequenceLists->longList = longList; 00827 sequencesLists[longListWordLocations[numWordLocations].sequenceNumber] = sequenceLists; 00828 00829 rsdb_accumulatorsSize += sizeof(struct sequenceLists); 00830 } 00831 } 00832 else 00833 { 00834 // Get each list 00835 numWordLocations = postings_decodeList(finalLists[key], wordLocations); 00836 00837 // Process a list of word locations and insert/update diagonal entries 00838 rsdb_processList(diagonals, wordLocations, numWordLocations); 00839 } 00840 00841 key--; 00842 } 00843 00844 time += clock(); 00845 printf("Hashtable processing time=%f\n", (float)time / CLOCKS_PER_SEC); fflush(stdout); 00846 printf("."); fflush(stdout); 00847 00848 printf("Accumulators size=%.1f Mb\n", (float)rsdb_accumulatorsSize / 1024.0 / 1024.0); 00849 00850 free(finalLists); 00851 00852 hashtablePass++; 00853 } 00854 00855 printf("done.\n"); fflush(stdout); 00856 00857 // Free hash counter 00858 hashcounter_free(hashCounter); 00859 00860 free(wordLocations); 00861 00862 printf("Identifying high-scoring sequence pairs / hierarchical clustering..."); fflush(stdout); 00863 time = -clock(); 00864 00865 sequenceMatches = memSingleBlock_initialize(sizeof(struct sequenceMatch), numberOfSequences); 00866 00867 // For each sequence 00868 sequenceNumber = 0; 00869 numPairs = 0; 00870 while (sequenceNumber < numberOfSequences) 00871 { 00872 sequence1 = sequenceNumber; 00873 seq1 = sequences + sequence1; 00874 00875 // Don't process sequences that already have parents 00876 if (seq1->parent == NULL) 00877 { 00878 // For each long list this sequence appears in 00879 sequenceLists = sequencesLists[sequenceNumber]; 00880 while (sequenceLists != NULL) 00881 { 00882 longList = sequenceLists->longList; 00883 00884 // Get the long list 00885 wordLocations = longList->list; 00886 numWordLocations = longList->length; 00887 00888 // printf("Seq %d listLength %d\n", sequenceNumber, numWordLocations); 00889 00890 // Find entry for sequence 1 00891 location1 = 0; 00892 while (location1 < numWordLocations) 00893 { 00894 if (wordLocations[location1].sequenceNumber == sequence1) 00895 break; 00896 00897 location1++; 00898 } 00899 00900 // For every entry after that 00901 location2 = location1 + 1; 00902 newListLength = location2; 00903 while (location2 < numWordLocations) 00904 { 00905 if (sequences[wordLocations[location2].sequenceNumber].parent == NULL) 00906 { 00907 // Update accumulators 00908 rsdb_updateDiagonalMatches(diagonals, wordLocations[location1], 00909 wordLocations[location2]); 00910 00911 // Remove clustered sequences from the list 00912 wordLocations[newListLength] = wordLocations[location2]; 00913 00914 newListLength++; 00915 } 00916 00917 location2++; 00918 } 00919 00920 // printf("Length Now %d\n", newListLength); 00921 longList->length = newListLength; 00922 00923 sequenceLists = sequenceLists->next; 00924 } 00925 00926 // Go through list of matching diagonals 00927 sequenceDiagonals = diagonals + sequenceNumber; 00928 memSingleBlock_resetCurrent(sequenceDiagonals); 00929 while ((diagonalMatch = memSingleBlock_getCurrent(sequenceDiagonals)) != NULL) 00930 { 00931 sequence2 = diagonalMatch->matchSequence; 00932 relativeOffset = diagonalMatch->relativeOffset; 00933 seq2 = sequences + sequence2; 00934 00935 /* 00936 if (minimum(seq1->length, seq2->length) > 10) 00937 { 00938 // Calculate rough score based on number of matches, length of overlap region 00939 score = 100 * diagonalMatch->numMatches * decoWordLength / 00940 minimum(seq1->length, seq2->length); 00941 00942 // score = rsdb_score(seq1, seq2, relativeOffset, wildcardsThreshold); 00943 // if (score > 0) 00944 printf("%d,%d matches=%d w=%d q=%d i=%d seqLength=%d\n", 00945 seq1->number, seq2->number, diagonalMatch->numMatches, decoWordLength, 00946 decoQantum, numIterations, minimum(seq1->length, seq2->length), score); 00947 } 00948 */ 00949 00950 // if (score >= percentMatchesRequired) 00951 { 00952 numPairs++; 00953 00954 // Case 1 - neither sequence has a parent 00955 if (seq1->parent == NULL && seq2->parent == NULL) 00956 { 00957 // Compare the two sequences in detail 00958 score = rsdb_score(seq1, seq2, relativeOffset, wildcardsThreshold); 00959 00960 if (score > 0) 00961 { 00962 // printf("New cluster\n"); fflush(stdout); 00963 parent = (struct parent*)memBlocks_newEntry(rsdb_parents); 00964 rsdb_newParent(parent, seq1); 00965 rsdb_addChild(parent, seq2, relativeOffset); 00966 } 00967 } 00968 // Case 2 - sequence1 has a parent, sequence2 does not 00969 else if (seq1->parent != NULL && seq2->parent == NULL) 00970 { 00971 // printf("Add to %d\n", seq1->parent->numChildren); fflush(stdout); 00972 00973 // Check score for aligning sequence to parent 00974 parent = seq1->parent; 00975 relativeOffset += seq1->regionStart; 00976 00977 score = rsdb_score(parent->children[0], seq2, relativeOffset, wildcardsThreshold); 00978 00979 if (score > 0) 00980 { 00981 rsdb_addChild(seq1->parent, seq2, relativeOffset); 00982 } 00983 } 00984 } 00985 } 00986 } 00987 00988 free(diagonals[sequenceNumber].block); 00989 00990 sequenceNumber++; 00991 } 00992 00993 printf("done. (%d pairs)\n", numPairs); fflush(stdout); 00994 time += clock(); 00995 printf("Total time=%f\n", (float)time / CLOCKS_PER_SEC); fflush(stdout); 00996 00997 // Free long lists 00998 memBlocks_resetCurrent(blockLongLists); 00999 while ((longList = memBlocks_getCurrent(blockLongLists)) != NULL) 01000 { 01001 free(longList->list); 01002 } 01003 memBlocks_free(blockSequenceLists); 01004 memBlocks_free(blockLongLists); 01005 01006 free(sequencesLists); 01007 free(diagonals); 01008 01009 memSingleBlock_free(sequenceMatches); 01010 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void rsdb_updateDiagonalMatches | ( | struct memSingleBlock * | diagonals, | |
| struct wordLocation | location1, | |||
| struct wordLocation | location2 | |||
| ) |
Definition at line 1051 of file rsdb.c.
References int4, diagonal::matchSequence, memSingleBlock_getCurrent(), memSingleBlock_newEntry(), memSingleBlock_resetCurrent(), wordLocation::offset, diagonal::relativeOffset, rsdb_accumulatorsSize, and wordLocation::sequenceNumber.
Referenced by rsdb_processList(), and rsdb_spexClusterSequences().
01053 { 01054 struct memSingleBlock *sequenceDiagonals; 01055 struct diagonal* diagonalMatch; 01056 int4 relativeOffset; 01057 char matchFound; 01058 01059 // If the locations don't both refer to the same sequence 01060 if (location1.sequenceNumber != location2.sequenceNumber) 01061 { 01062 // Search list of matching diagonals for this sequence for one that is the 01063 // same diagonal and matching sequence as the new match 01064 relativeOffset = location1.offset - location2.offset; 01065 matchFound = 0; 01066 sequenceDiagonals = diagonals + location1.sequenceNumber; 01067 memSingleBlock_resetCurrent(sequenceDiagonals); 01068 while ((diagonalMatch = memSingleBlock_getCurrent(sequenceDiagonals)) != NULL) 01069 { 01070 if (diagonalMatch->relativeOffset == relativeOffset && 01071 diagonalMatch->matchSequence == location2.sequenceNumber) 01072 { 01073 // If we found an identinal entry, increment number of matches 01074 matchFound = 1; 01075 // diagonalMatch->numMatches++; 01076 break; 01077 } 01078 } 01079 01080 // Otherwise create a new entry, ONLY if neither sequence1 or 2 belong to a cluster 01081 if (!matchFound)// && sequences[wordLocations[count1].sequenceNumber].parent == NULL 01082 // && sequences[wordLocations[count2].sequenceNumber].parent == NULL) 01083 { 01084 diagonalMatch = memSingleBlock_newEntry(sequenceDiagonals); 01085 diagonalMatch->relativeOffset = relativeOffset; 01086 diagonalMatch->matchSequence = location2.sequenceNumber; 01087 // diagonalMatch->numMatches = 1; 01088 01089 rsdb_accumulatorsSize += sizeof(struct diagonal); 01090 } 01091 } 01092 }
Here is the call graph for this function:

Here is the caller graph for this function:

| Hashcounter* rsdb_winnowingSpex | ( | struct sequence * | sequences, | |
| uint4 | numberOfSequences, | |||
| uint4 | numberOfLetters | |||
| ) |
Definition at line 526 of file rsdb.c.
References decoQantum, decoWordLength, hashcounter_count(), hashcounter_free(), hashcounter_hash, hashcounter_insert(), hashcounter_multiple(), hashcounter_new(), sequence::length, numIterations, sequence::sequence, and uint4.
00528 { 00529 Hashcounter *hashCounter1 = NULL, *hashCounter2 = NULL; 00530 uint4 hashCounterSize = 1; 00531 uint4 words, duplicates, iteration = 0, seed, wordLength; 00532 uint4 sinceChange, submatches, lookup; 00533 uint4 sequenceNumber, sequenceSize, offset, suboffset; 00534 unsigned char* sequence, *word; 00535 uint4 time; 00536 uint4 hashValue1, hashValue2, position, smallestPosition; 00537 uint4 windowHashes[decoQantum], inserted[decoQantum]; 00538 00539 // int debug = 0; 00540 00541 // Calculate size of hashcounter 00542 while (pow(2,hashCounterSize) < numberOfLetters / decoQantum) 00543 { 00544 hashCounterSize++; 00545 } 00546 hashCounter2 = hashcounter_new(hashCounterSize, 0); 00547 00548 wordLength = decoWordLength - ((numIterations - 1) * decoQantum); 00549 00550 while (iteration < numIterations) 00551 { 00552 time = -clock(); 00553 words = 0; duplicates = 0; 00554 sinceChange = 0; 00555 00556 // if (debug) 00557 // printf("Iteration %d wordLength %d\n", iteration, wordLength); 00558 00559 // For each sequence 00560 sequenceNumber = 0; 00561 while (sequenceNumber < numberOfSequences) 00562 { 00563 sequence = sequences[sequenceNumber].sequence; 00564 sequenceSize = sequences[sequenceNumber].length; 00565 00566 // For each sequence longer enough that hasn't been excluded 00567 if (sequenceSize >= wordLength) 00568 { 00569 // For each word 00570 offset = 0; 00571 00572 // For first few words 00573 while (offset < decoQantum) 00574 { 00575 // Just add hash to the window 00576 word = sequence + offset; 00577 hashcounter_hash(hashCounter2, word, wordLength, hashValue2, position); 00578 windowHashes[offset] = hashValue2; 00579 inserted[offset] = 0; 00580 00581 // if (debug) 00582 // printf("New hash [%d] %u\n", offset, hashValue2); 00583 00584 offset++; 00585 } 00586 00587 // For remaining words 00588 while (offset < sequenceSize - wordLength) 00589 { 00590 // If not the first iteration 00591 submatches = 0; 00592 if (hashCounter1 != NULL) 00593 { 00594 // Count number of sub-chunks that appear more than once 00595 suboffset = offset; 00596 while (suboffset <= offset + decoQantum) 00597 { 00598 word = sequence + suboffset; 00599 hashcounter_hash(hashCounter1, word, wordLength - decoQantum, hashValue1, position); 00600 if (hashcounter_multiple(hashCounter1, hashValue1)) 00601 submatches++; 00602 suboffset++; 00603 } 00604 } 00605 00606 // If this word may appear more than once in the collection 00607 if (hashCounter1 == NULL || submatches >= 2) 00608 { 00609 // Hash the word and add to window 00610 word = sequence + offset; 00611 hashcounter_hash(hashCounter2, word, wordLength, hashValue2, position); 00612 windowHashes[offset % decoQantum] = hashValue2; 00613 inserted[offset % decoQantum] = 0; 00614 00615 // if (debug) 00616 // printf("New hash [%d] %u (submatches=%d) winPos=%d\n", offset, hashValue2, submatches, offset % decoQantum); 00617 00618 // Search window for smallest hash value 00619 position = 0; smallestPosition = 0; 00620 while (position < decoQantum) 00621 { 00622 // if (debug) 00623 // printf("Old hash %u [%d/%d]\n", windowHashes[position], position, decoQantum); 00624 00625 if (windowHashes[position] < windowHashes[smallestPosition]) 00626 smallestPosition = position; 00627 00628 position++; 00629 } 00630 00631 // If no smaller value, insert word into hash counter 00632 if (!inserted[smallestPosition]) 00633 { 00634 inserted[smallestPosition] = 1; 00635 // if (debug) 00636 // { 00637 // printf("Inserted %d! ", windowHashes[smallestPosition]); 00638 // print_singleSequence(sequence + offset, wordLength); 00639 // printf("\n"); 00640 // } 00641 hashcounter_insert(hashCounter2, hashValue2); 00642 } 00643 } 00644 else 00645 { 00646 // if (debug) 00647 // printf("Null hash\n"); 00648 // Otherwise insert placeholder into window 00649 windowHashes[offset % decoQantum] = hashCounterSize; 00650 } 00651 00652 offset++; 00653 } 00654 } 00655 00656 sequenceNumber++; 00657 } 00658 00659 if (hashCounter1) 00660 hashcounter_free(hashCounter1); 00661 hashCounter1 = hashCounter2; 00662 seed = rand(); 00663 00664 if (iteration != numIterations - 1) 00665 hashCounter2 = hashcounter_new(hashCounterSize, 0); 00666 00667 printf("Iteration %d WordLength=%d\n", iteration, wordLength); fflush(stdout); 00668 hashcounter_count(hashCounter1, stdout); fflush(stdout); 00669 // printf("Total malloced=%s\n", global_int4toString(global_totalMalloc)); fflush(stdout); 00670 00671 time += clock(); 00672 printf("Iteration time=%.2f secs\n", (float)time / CLOCKS_PER_SEC); fflush(stdout); 00673 00674 iteration++; 00675 wordLength += decoQantum; 00676 } 00677 00678 return hashCounter1; 00679 }
Here is the call graph for this function:

| void rsdb_writeClusters | ( | char * | filename, | |
| struct sequence * | sequences | |||
| ) |
Definition at line 261 of file rsdb.c.
References parent::children, sequence::descriptionLength, sequence::descriptionLocation, descriptions_getDescription(), global_malloc(), sequence::length, memBlocks_getCurrent(), memBlocks_resetCurrent(), parent::numChildren, sequence::parent, readdb_dbAlphabetType, readdb_numberOfSequences, rsdb_compareDescriptionLocations(), rsdb_parents, sequence::sequence, parent::sequence, uint4, writedb_addSequence(), writedb_close(), writedb_initialize(), and writedb_numberOfLetters.
Referenced by main().
00262 { 00263 struct parent* parent; 00264 char *newFilename, *wildcardsOutFilename, *renamedFilename; 00265 uint4 count, sequenceNumber, sequenceLength, descriptionLength, descriptionLocation; 00266 unsigned char *sequence, *description; 00267 uint4 childNum, numChildren, descriptionTotalLength, numWilds; 00268 struct child* children; 00269 struct wild* wilds, *wildcards; 00270 00271 // Construct sequence and description filenames 00272 newFilename = (char*)global_malloc(strlen(filename) + 12); 00273 sprintf(newFilename, "%s_rsdb", filename); 00274 00275 // Initialize writing to formatted database 00276 writedb_initialize(newFilename, readdb_dbAlphabetType); 00277 00278 printf("Writing representative sequences to disk..."); fflush(stdout); 00279 00280 // Sort sequences back to original order 00281 qsort(sequences, readdb_numberOfSequences, sizeof(struct sequence), 00282 rsdb_compareDescriptionLocations); 00283 00284 // For each sequence without a parent or only child 00285 sequenceNumber = 0; 00286 while (sequenceNumber < readdb_numberOfSequences) 00287 { 00288 if (sequences[sequenceNumber].parent == NULL || 00289 sequences[sequenceNumber].parent->numChildren == 1 || 00290 sequences[sequenceNumber].parent->sequence == sequences[sequenceNumber].sequence) 00291 { 00292 sequenceLength = sequences[sequenceNumber].length; 00293 if (sequenceLength > 10) 00294 { 00295 sequence = sequences[sequenceNumber].sequence; 00296 descriptionLength = sequences[sequenceNumber].descriptionLength; 00297 descriptionLocation = sequences[sequenceNumber].descriptionLocation; 00298 00299 // Load description from old collection 00300 // description = sequences[sequenceNumber].description; 00301 description = descriptions_getDescription(descriptionLocation, descriptionLength); 00302 00303 // Add sequence to the formatted collection 00304 writedb_addSequence(sequence, sequenceLength, description, 00305 descriptionLength, NULL, 0, NULL, 0); 00306 00307 free(description); 00308 } 00309 } 00310 00311 // Free the sequence 00312 // free(sequences[sequenceNumber].sequence); 00313 00314 sequenceNumber++; 00315 } 00316 00317 // For each parent 00318 memBlocks_resetCurrent(rsdb_parents); 00319 while ((parent = memBlocks_getCurrent(rsdb_parents)) != NULL) 00320 { 00321 // Free the parent 00322 free(parent->children); 00323 } 00324 00325 printf("done.\n"); fflush(stdout); 00326 00327 printf("%llu letters written\n", writedb_numberOfLetters); 00328 00329 // Finalize writing to the new formatted collection 00330 writedb_close(); 00331 00332 /* // Rename clustered collection 00333 renamedFilename = (char*)global_malloc(strlen(filename) + 20); 00334 sprintf(renamedFilename, "%s.wildcards", filename); 00335 if (rename(wildcardsOutFilename, renamedFilename)) 00336 fprintf(stderr, "Error renaming file %s to %s\n", wildcardsOutFilename, renamedFilename); 00337 00338 sprintf(renamedFilename, "%s.sequences", filename); 00339 if (rename(writedb_sequenceFilename, renamedFilename)) 00340 fprintf(stderr, "Error renaming file %s to %s\n", writedb_sequenceFilename, renamedFilename); 00341 00342 sprintf(renamedFilename, "%s.descriptions", filename); 00343 if (rename(writedb_descriptionsFilename, renamedFilename)) 00344 fprintf(stderr, "Error renaming file %s to %s\n", writedb_descriptionsFilename, renamedFilename); 00345 00346 sprintf(renamedFilename, "%s.data", filename); 00347 if (rename(writedb_dataFilename, renamedFilename)) 00348 fprintf(stderr, "Error renaming file %s to %s\n", writedb_dataFilename, renamedFilename);*/ 00349 }
Here is the call graph for this function:

Here is the caller graph for this function:

| uint4 decoQantum = 9 |
| uint4 decoWordLength = 30 |
| uint4 maxListLength = 20 |
| uint4 numIterations = 3 |
| uint4 rsdb_accumulatorsSize = 0 |
Definition at line 83 of file rsdb.c.
Referenced by rsdb_spexClusterSequences(), and rsdb_updateDiagonalMatches().
| float rsdb_averageMatchScore = 0 |
| float* rsdb_averageWildcodeScores = NULL |
| uint4 rsdb_numMergeDiagonals |
| uint4 rsdb_numParents = 0 |
| struct memBlocks* rsdb_parents |
Definition at line 79 of file rsdb.c.
Referenced by main(), rsdb_calculateSavings(), rsdb_spexClusterSequences(), and rsdb_writeClusters().
| int4 rsdb_PSSMend |
| int4 rsdb_PSSMlength |
| int4 rsdb_PSSMstart |
| float wildcardsThreshold = 0.25 |
1.5.2