#include "blast.h"Include dependency graph for chooseWilds.c:

Go to the source code of this file.
Functions | |
| void | chooseWilds_printOccurenceMatrix (struct wild *wilds, uint4 numWilds) |
| int4 | main (int4 argc, char *argv[]) |
| void chooseWilds_printOccurenceMatrix | ( | struct wild * | wilds, | |
| uint4 | numWilds | |||
| ) |
Definition at line 212 of file chooseWilds.c.
References wild::code, wild::count, encoding_getLetter(), encoding_numLetters, getbit, and uint4.
Referenced by main().
00213 { 00214 uint4 count, aa1, aa2, code; 00215 uint4 counts[encoding_numLetters][encoding_numLetters]; 00216 00217 aa1 = 0; 00218 while (aa1 < encoding_numLetters) 00219 { 00220 aa2 = 0; 00221 while (aa2 < encoding_numLetters) 00222 { 00223 counts[aa1][aa2] = 0; 00224 aa2++; 00225 } 00226 aa1++; 00227 } 00228 00229 count = 0; 00230 while (count < numWilds) 00231 { 00232 code = wilds[count].code; 00233 00234 aa1 = 0; 00235 while (aa1 < encoding_numLetters) 00236 { 00237 aa2 = 0; 00238 while (aa2 < encoding_numLetters) 00239 { 00240 if (getbit(code, aa1) && getbit(code, aa2)) 00241 { 00242 counts[aa1][aa2] += wilds[count].count; 00243 } 00244 aa2++; 00245 } 00246 aa1++; 00247 } 00248 00249 // printf("%d: ", wilds[count].count); 00250 // wildcards_printWildcard(wilds[count].code); 00251 count++; 00252 } 00253 00254 aa1 = 0; 00255 while (aa1 < encoding_numLetters) 00256 { 00257 printf("%c ", encoding_getLetter(aa1)); 00258 00259 aa2 = 0; 00260 while (aa2 < aa1) 00261 { 00262 printf("%6d ", counts[aa1][aa2]); 00263 aa2++; 00264 } 00265 printf("\n"); 00266 aa1++; 00267 } 00268 00269 printf(" "); 00270 aa2 = 0; 00271 while (aa2 < encoding_numLetters) 00272 { 00273 printf(" %c ", encoding_getLetter(aa2)); 00274 aa2++; 00275 } 00276 printf("\n"); 00277 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int4 main | ( | int4 | argc, | |
| char * | argv[] | |||
| ) |
Definition at line 13 of file chooseWilds.c.
References chooseWilds_printOccurenceMatrix(), wild::code, wild::count, encoding_alphabetType, encoding_alphabetTypes, encoding_initialize(), encoding_numLetters, encoding_protein, global_malloc(), parameters_findScoringMatrix(), parameters_scoringMatrixPath, readdb_dbAlphabetType, readdb_getChildren(), readdb_longestSequenceLength, readdb_nextVolume(), readdb_numberOfClusters, readdb_numberOfLetters, readdb_numberOfSequences, readdb_numberOfVolumes, readdb_open(), readdb_readSequence(), scoreMatrix_load(), setbit, uint4, wildcards_averageResidueWildMatch(), wildcards_countOccurences(), wildcards_getOccurences(), wildcards_getSubset(), wildcards_initializeCountOccurences(), wildcards_numClusterWildcards, wildcards_outputWildcards(), wildcards_printWildcard(), wildcards_scoreCandidates(), wildcards_scoreMatrix, and wildcards_scoringConstant.
00014 { 00015 unsigned char *filename, *readdb_address, *sequence, code, *wildcardsFilename; 00016 uint4 descriptionStart = 0, descriptionLength = 0, sequenceLength; 00017 uint4 encodedLength, numChildren, count; 00018 char *description; 00019 struct child* children, *child; 00020 uint4 candidateNum, change, childNum; 00021 uint4 numWilds = 0; 00022 struct wild *wilds, defaultWild, *candidates, bestNewCandidate; 00023 struct wild *wildSubset, *newCandidates, *bestNewCandidates; 00024 uint4 sizeWildSubset, numOccurences, numCandidates; 00025 float defaultWildscore, candidatesScore, bestScore; 00026 00027 // User must provide FASTA format file at command line 00028 if (argc < 4) 00029 { 00030 fprintf(stderr, "Useage: chooseWilds <database> <Wildcard score constant> <Wildcards output file>\n"); 00031 exit(-1); 00032 } 00033 filename = argv[1]; 00034 wildcards_scoringConstant = atof(argv[2]); 00035 wildcardsFilename = argv[3]; 00036 00037 readdb_open(filename); 00038 00039 printf("Number of clusters = %u\n", readdb_numberOfClusters); 00040 printf("Number of sequences = %u\n", readdb_numberOfSequences); 00041 printf("Number of volumes = %u\n", readdb_numberOfVolumes); 00042 printf("Total number of letters = %llu\n", readdb_numberOfLetters); 00043 printf("Length of longest sequence = %u\n", readdb_longestSequenceLength); 00044 printf("Alphabet type = %s\n", encoding_alphabetTypes[readdb_dbAlphabetType]); 00045 00046 // Initialize codes array 00047 encoding_initialize(readdb_dbAlphabetType); 00048 00049 // Load score matrix 00050 parameters_findScoringMatrix(); 00051 wildcards_scoreMatrix = scoreMatrix_load(parameters_scoringMatrixPath); 00052 00053 // Count occurences of each wildcard set 00054 wildcards_initializeCountOccurences(readdb_longestSequenceLength); 00055 do 00056 { 00057 // Read each sequence in the collection 00058 while (readdb_readSequence(&sequence, &sequenceLength, &descriptionStart, 00059 &descriptionLength, &encodedLength)) 00060 { 00061 // If a protein sequence cluster 00062 if (encoding_alphabetType == encoding_protein && sequenceLength + 2 != encodedLength) 00063 { 00064 // Get the children 00065 children = readdb_getChildren(sequence, sequenceLength, encodedLength, 00066 descriptionStart, &numChildren); 00067 00068 // Add to list of occurences 00069 wildcards_countOccurences(children, numChildren, sequenceLength); 00070 00071 childNum = 0; 00072 while (childNum < numChildren) 00073 { 00074 free(children[childNum].edits); 00075 free(children[childNum].sequence - 1); 00076 childNum++; 00077 } 00078 00079 free(children); 00080 } 00081 } 00082 } 00083 while (readdb_nextVolume()); 00084 00085 // Get final list of number of occurences of each wild 00086 wilds = wildcards_getOccurences(&numWilds); 00087 00088 chooseWilds_printOccurenceMatrix(wilds, numWilds); 00089 00090 // Build default wildcard 00091 defaultWild.code = 0; 00092 defaultWild.count = 0; 00093 code = 0; 00094 while (code < encoding_numLetters) 00095 { 00096 setbit(defaultWild.code, code); 00097 code++; 00098 } 00099 00100 // Get average score for default wildcard 00101 wildSubset = wildcards_getSubset(defaultWild, wilds, numWilds, &sizeWildSubset, &numOccurences); 00102 defaultWildscore = wildcards_averageResidueWildMatch(defaultWild, wildSubset, sizeWildSubset); 00103 printf("defaultWildScore=%f occurences=%d\n", defaultWildscore, numOccurences); 00104 00105 // Build up list of wildcard candidates 00106 candidates = (struct wild*)global_malloc(sizeof(struct wild) * wildcards_numClusterWildcards); 00107 numCandidates = 0; 00108 while (numCandidates < wildcards_numClusterWildcards - 1) 00109 { 00110 // Explore each possible option to add to list of candidates 00111 count = 0; 00112 bestScore = 0; 00113 while (count < numWilds) 00114 { 00115 // printf("set pos %d to ", numCandidates); 00116 // wildcards_printWildcard(wilds[count].code); 00117 candidates[numCandidates] = wilds[count]; 00118 00119 // Score a set of candidates 00120 candidatesScore = wildcards_scoreCandidates(candidates, numCandidates + 1, 00121 wilds, numWilds, defaultWildscore); 00122 // printf("Candidates saving=%f\n", candidatesScore); 00123 if (candidatesScore > bestScore) 00124 { 00125 bestScore = candidatesScore; 00126 bestNewCandidate = wilds[count]; 00127 } 00128 00129 count++; 00130 } 00131 00132 printf("Score=%f Best new candidate (%d): ", bestScore, numCandidates); 00133 wildcards_printWildcard(bestNewCandidate.code); 00134 candidates[numCandidates] = bestNewCandidate; 00135 00136 numCandidates++; 00137 } 00138 00139 newCandidates = (struct wild*)global_malloc(sizeof(struct wild) * wildcards_numClusterWildcards); 00140 bestNewCandidates = (struct wild*)global_malloc(sizeof(struct wild) * wildcards_numClusterWildcards); 00141 00142 // Perform hill climbing; consider changing each position 00143 change = 1; 00144 while (change) 00145 { 00146 change = 0; 00147 candidateNum = 0; 00148 bestScore = 0; 00149 while (candidateNum < numCandidates) 00150 { 00151 // Start with current candidates 00152 memcpy(newCandidates, candidates, sizeof(struct wild) * wildcards_numClusterWildcards - 1); 00153 00154 // Change current position to every possible candidate 00155 count = 0; 00156 while (count < numWilds) 00157 { 00158 newCandidates[candidateNum] = wilds[count]; 00159 00160 // Score a possible new set of candidates 00161 candidatesScore = wildcards_scoreCandidates(newCandidates, numCandidates, 00162 wilds, numWilds, defaultWildscore); 00163 00164 // Check if best new candidates 00165 if (candidatesScore > bestScore) 00166 { 00167 bestScore = candidatesScore; 00168 memcpy(bestNewCandidates, newCandidates, sizeof(struct wild) * 00169 wildcards_numClusterWildcards - 1); 00170 } 00171 00172 count++; 00173 } 00174 00175 candidateNum++; 00176 } 00177 00178 // Update candidates 00179 if (bestScore > wildcards_scoreCandidates(candidates, numCandidates, 00180 wilds, numWilds, defaultWildscore)) 00181 { 00182 printf("New bestScore=%f\n", bestScore); 00183 memcpy(candidates, bestNewCandidates, sizeof(struct wild) * 00184 wildcards_numClusterWildcards - 1); 00185 change = 1; 00186 } 00187 00188 candidateNum = 0; 00189 while (candidateNum < numCandidates) 00190 { 00191 wildcards_printWildcard(candidates[candidateNum].code); 00192 candidateNum++; 00193 } 00194 } 00195 00196 // Print out final set of clusters with default wild added 00197 candidates[numCandidates] = defaultWild; numCandidates++; 00198 wildcards_scoreCandidates(candidates, numCandidates, wilds, numWilds, defaultWildscore); 00199 wildcards_outputWildcards(wildcardsFilename); 00200 00201 printf("%d sequences read.\n", readdb_numberOfSequences); 00202 fflush(stdout); 00203 00204 free(candidates); 00205 free(newCandidates); 00206 free(bestNewCandidates); 00207 00208 return 0; 00209 }
Here is the call graph for this function:

1.5.2