include/wildcards.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  clusterWildcard
struct  wild

Defines

#define setbit(i, pos)   (i |= (1 << pos))
#define getbit(i, pos)   (i & (1 << pos))
#define wildcards_numClusterWildcards   7

Functions

void wildcards_initialize ()
void wildcards_printWildcard (uint4 wildcard)
float wildcards_scoreCandidates (struct wild *wildCandidates, uint4 numWildCandidates, struct wild *wilds, uint4 numWilds, float defaultWildscore)
wildwildcards_getSubset (struct wild wildCandidate, struct wild *wilds, uint4 numWilds, uint4 *sizeWildSubset, uint4 *numOccurences)
float wildcards_averageResidueWildMatch (struct wild wildCandidate, struct wild *wilds, uint4 numWilds)
float wildcards_scoreResidueWildMatch (struct wild wildCandidate, struct wild *wilds, uint4 numWilds, uint4 code)
wildwildcards_joinSubset (struct wild *set1, uint4 *size1, struct wild *set2, uint4 size2)
void wildcards_removeSubset (struct wild *set1, uint4 *size1, struct wild *set2, uint4 size2, uint4 *numOccurences)
void wildcards_outputWildcards (char *filename)
void wildcards_readWildcards (char *filename)
void wildcards_initializeCountOccurences (uint4 longestSequenceLength)
void wildcards_countOccurences (struct child *children, uint4 numChildren, uint4 sequenceLength)
wildwildcards_getOccurences (uint4 *numWilds)

Variables

scoreMatrix wildcards_scoreMatrix
int wildcards_printWilds
float wildcards_scoringConstant
int2 wildcards_scoreMatrixRow [encoding_aaStartWildcards]
clusterWildcard wildcards_clusterWildcards [wildcards_numClusterWildcards]


Define Documentation

#define getbit ( i,
pos   )     (i & (1 << pos))

Definition at line 19 of file wildcards.h.

Referenced by chooseWilds_printOccurenceMatrix(), cluster_addChild(), cluster_averageWildcodeScore(), cluster_score(), wildcards_countBits(), wildcards_outputWildcards(), wildcards_printWildcard(), and wildcards_scoreResidueWildMatch().

#define setbit ( i,
pos   )     (i |= (1 << pos))

Definition at line 18 of file wildcards.h.

Referenced by cluster_addChild(), cluster_score(), main(), wildcards_countOccurences(), and wildcards_initialize().

#define wildcards_numClusterWildcards   7

Definition at line 20 of file wildcards.h.

Referenced by cluster_addChild(), cluster_score(), cluster_writeClusters(), main(), scoreMatrix_load(), wildcards_initialize(), wildcards_outputWildcards(), and wildcards_readWildcards().


Function Documentation

float wildcards_averageResidueWildMatch ( struct wild  wildCandidate,
struct wild wilds,
uint4  numWilds 
)

Definition at line 267 of file wildcards.c.

References wild::code, encoding_numLetters, Robinson_prob, wildcards_scoreMatrixRow, and wildcards_scoreResidueWildMatch().

Referenced by main(), and wildcards_scoreCandidates().

00269 {
00270         unsigned char code;
00271         float score, averageScore = 0;
00272 
00273     code = 0;
00274     while (code < encoding_numLetters)
00275     {
00276         // Get score for aligning residue 'code' with wildcard
00277         score = wildcards_scoreResidueWildMatch(wildCandidate, wilds, numWilds, code);
00278         averageScore += score * (Robinson_prob[code] / 1000.0);
00279 
00280         wildcards_scoreMatrixRow[code] = ceil(score - 0.5);
00281 
00282         code++;
00283     }
00284 
00285     return averageScore;
00286 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_countOccurences ( struct child children,
uint4  numChildren,
uint4  sequenceLength 
)

Definition at line 361 of file wildcards.c.

References edit::code, wild::count, child::edits, child::numEdits, edit::position, child::regionStart, setbit, uint4, wildcards_numWilds, wildcards_wildCount, and wildcards_wildPositions.

Referenced by cluster_writeClusters(), and main().

00362 {
00363         uint4 childNum, count, position;
00364     struct edit* edits;
00365     struct child* child;
00366     uint4 numEdits;
00367 
00368     // For each child
00369     childNum = 0;
00370     while (childNum < numChildren)
00371     {
00372         child = children + childNum;
00373 
00374         edits = child->edits;
00375         numEdits = child->numEdits;
00376 
00377         // For each edit
00378         count = 0;
00379         while (count < child->numEdits)
00380         {
00381             // Set bit for this code appearing at this position
00382             position = edits->position + child->regionStart;
00383             setbit(wildcards_wildPositions[position], edits->code);
00384 
00385             edits++;
00386             count++;
00387         }
00388 
00389         childNum++;
00390     }
00391 
00392     // Increment count for each wildcard code and clear position counts
00393     position = 0;
00394     while (position < sequenceLength)
00395     {
00396         if (wildcards_wildPositions[position])
00397         {
00398             if (!wildcards_wildCount[wildcards_wildPositions[position]])
00399                 wildcards_numWilds++;
00400             wildcards_wildCount[wildcards_wildPositions[position]]++;
00401 
00402             wildcards_wildPositions[position] = 0;
00403         }
00404         position++;
00405     }
00406 }

Here is the caller graph for this function:

struct wild* wildcards_getOccurences ( uint4 *  numWilds  )  [read]

Definition at line 409 of file wildcards.c.

References wild::code, wild::count, global_malloc(), uint4, wildcards_numWildCodes, wildcards_numWilds, wildcards_wildCount, and wildcards_wildPositions.

Referenced by cluster_writeClusters(), and main().

00410 {
00411         uint4 count, wildCode;
00412         struct wild* wilds;
00413 
00414     // Construct list of wilds
00415     wilds = (struct wild*)global_malloc(sizeof(struct wild) * wildcards_numWilds);
00416     count = 0; wildCode = 0;
00417     while (wildCode < wildcards_numWildCodes)
00418     {
00419                 if (wildcards_wildCount[wildCode])
00420         {
00421                         wilds[count].code = wildCode;
00422                         wilds[count].count = wildcards_wildCount[wildCode];
00423 //                      printf("(%d) %4d: ", count, wilds[count].count);
00424 //            wildcards_printWildcard(wildCode);
00425             count++;
00426         }
00427         wildCode++;
00428     }
00429 
00430     *numWilds = wildcards_numWilds;
00431 
00432     free(wildcards_wildPositions);
00433     free(wildcards_wildCount);
00434 
00435     return wilds;
00436 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct wild* wildcards_getSubset ( struct wild  wildCandidate,
struct wild wilds,
uint4  numWilds,
uint4 *  sizeWildSubset,
uint4 *  numOccurences 
) [read]

Definition at line 223 of file wildcards.c.

References wild::code, wild::count, global_malloc(), and uint4.

Referenced by main(), and wildcards_scoreCandidates().

00225 {
00226         struct wild *wildSubset;
00227         uint4 count = 0;
00228 
00229     // Calculate size of subset
00230     *sizeWildSubset = 0;
00231     while (count < numWilds)
00232     {
00233         if ((wilds[count].code & wildCandidate.code) == wilds[count].code)
00234                         (*sizeWildSubset)++;
00235                 count++;
00236     }
00237 
00238     // Build subset
00239         wildSubset = (struct wild*)global_malloc(sizeof(struct wild) * (*sizeWildSubset));
00240         count = 0; *sizeWildSubset = 0;
00241     while (count < numWilds)
00242     {
00243         if ((wilds[count].code & wildCandidate.code) == wilds[count].code)
00244         {
00245 //                      printf("%4d: ", wilds[count].count);
00246 //                  wildcards_printWildcard(wilds[count].code);
00247 
00248             wildSubset[*sizeWildSubset] = wilds[count];
00249                         (*sizeWildSubset)++;
00250                 }
00251         count++;
00252     }
00253 
00254     // Calculate total number of occurences of wildcard candidate
00255     count = 0;
00256     *numOccurences = 0;
00257     while (count < *sizeWildSubset)
00258     {
00259         *numOccurences += wildSubset[count].count;
00260         count++;
00261     }
00262 
00263     return wildSubset;
00264 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_initialize (  ) 

Definition at line 28 of file wildcards.c.

References encoding_getCode(), clusterWildcard::letters, setbit, uint4, wildcards_clusterWildcards, wildcards_numClusterWildcards, and clusterWildcard::wildCode.

Referenced by main().

00029 {
00030         uint4 count, wildCode;
00031         unsigned char *letters, code;
00032     struct clusterWildcard *clusterWildcard;
00033 
00034     // For each of the cluster wildcards
00035     count = 0;
00036     while (count < wildcards_numClusterWildcards)
00037     {
00038         wildCode = 0;
00039                 clusterWildcard = wildcards_clusterWildcards + count;
00040 
00041         // For each code
00042         letters = clusterWildcard->letters;
00043         while (*letters != '\0')
00044         {
00045                 // Set bit in wildcode
00046                         code = encoding_getCode(*letters);
00047             setbit(wildCode, code);
00048                 letters++;
00049         }
00050 
00051         clusterWildcard->wildCode = wildCode;
00052 
00053         count++;
00054     }
00055 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_initializeCountOccurences ( uint4  longestSequenceLength  ) 

Definition at line 334 of file wildcards.c.

References encoding_numLetters, global_malloc(), uint4, wildcards_numWildCodes, wildcards_numWilds, wildcards_wildCount, and wildcards_wildPositions.

Referenced by cluster_writeClusters(), and main().

00335 {
00336         uint4 position, wildCode;
00337 
00338     // Initialize array for recording wildcard position edits
00339     wildcards_wildPositions = (uint4*)global_malloc(sizeof(uint4) * longestSequenceLength);
00340         position = 0;
00341     while (position < longestSequenceLength)
00342     {
00343         wildcards_wildPositions[position] = 0;
00344         position++;
00345     }
00346 
00347     // Initialize array for count wilding occurences
00348     wildcards_numWildCodes = ceil(pow(2, encoding_numLetters));
00349     wildcards_wildCount = (uint4*)global_malloc(sizeof(uint4) * wildcards_numWildCodes);
00350     wildCode = 0;
00351     while (wildCode < wildcards_numWildCodes)
00352     {
00353                 wildcards_wildCount[wildCode] = 0;
00354         wildCode++;
00355     }
00356 
00357     wildcards_numWilds = 0;
00358 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct wild* wildcards_joinSubset ( struct wild set1,
uint4 *  size1,
struct wild set2,
uint4  size2 
) [read]

Definition at line 214 of file wildcards.c.

References global_realloc().

Referenced by wildcards_scoreCandidates().

00215 {
00216         set1 = (struct wild*)global_realloc(set1, sizeof(struct wild) * (*size1 + size2));
00217     memcpy(set1 + *size1, set2, size2 * sizeof(struct wild));
00218         (*size1) += size2;
00219         return set1;
00220 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_outputWildcards ( char *  filename  ) 

Definition at line 494 of file wildcards.c.

References wild::code, wild::count, encoding_aaStartWildcards, encoding_getLetter(), encoding_numLetters, getbit, uint4, wildcards_clusterWildcards, and wildcards_numClusterWildcards.

Referenced by cluster_writeClusters(), and main().

00495 {
00496         uint4 count = 0;
00497     unsigned char code = 0;
00498     FILE* file;
00499 
00500         // Open file for writing
00501         if ((file = fopen(filename, "w")) == NULL)
00502         {
00503                 fprintf(stderr, "Error opening file %s for writing\n", filename);
00504                 exit(-1);
00505         }
00506 
00507     // Sort wildcards in increasing order of average/expected score
00508 //      qsort(wildcards_clusterWildcards, wildcards_numClusterWildcards,
00509 //          sizeof(struct clusterWildcard), wildcards_compareClusterWildcards);
00510 
00511         // For each wildcard in order of average score
00512     while (count < wildcards_numClusterWildcards)
00513     {
00514         // Print letters
00515         code = 0;
00516         while (code < encoding_numLetters)
00517         {
00518             if (getbit(wildcards_clusterWildcards[count].wildCode, code))
00519             {
00520                 fprintf(file, "%c", encoding_getLetter(code));
00521             }
00522             code++;
00523         }
00524 
00525         // Print details
00526                 fprintf(file, ",%d,[", wildcards_clusterWildcards[count].wildCode);
00527 
00528         // Print scores
00529         code = 0;
00530         while (code < encoding_aaStartWildcards)
00531         {
00532                 fprintf(file, "%d", wildcards_clusterWildcards[count].scoreMatrixRow[code]);
00533                 code++;
00534 
00535             if (code < encoding_aaStartWildcards)
00536                 fprintf(file, ",");
00537         }
00538 
00539                 fprintf(file, "],%f\n", wildcards_clusterWildcards[count].averageScore);
00540 
00541         count++;
00542     }
00543 
00544     fclose(file);
00545 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_printWildcard ( uint4  wildcard  ) 

Definition at line 548 of file wildcards.c.

References wild::code, encoding_getLetter(), encoding_numLetters, getbit, and uint4.

Referenced by cluster_addChild(), and main().

00549 {
00550         uint4 code = 0;
00551 
00552     while (code < encoding_numLetters)
00553     {
00554         if (getbit(wildcard, code))
00555         {
00556                 printf("%c", encoding_getLetter(code));
00557         }
00558         code++;
00559     }
00560     printf("\n");
00561 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_readWildcards ( char *  filename  ) 

Definition at line 439 of file wildcards.c.

References wild::code, wild::count, encoding_aaStartWildcards, global_malloc(), clusterWildcard::letters, uint4, wildcards_clusterWildcards, and wildcards_numClusterWildcards.

Referenced by main(), and readdb_open().

00440 {
00441     FILE* file;
00442         uint4 count = 0, letterCount;
00443     unsigned char code = 0;
00444     char* letters;
00445 
00446         // Open file for reading
00447         if ((file = fopen(filename, "r")) == NULL)
00448         {
00449                 // If this fails, simply return
00450         return;
00451         }
00452 
00453     letters = (char*)global_malloc(encoding_aaStartWildcards + 1);
00454 
00455         // For each wildcard in order of average score
00456     while (count < wildcards_numClusterWildcards)
00457     {
00458         // Read letters
00459         letterCount = 0;
00460         while (1)
00461         {
00462                 fscanf(file, "%c", letters + letterCount);
00463                         if (letters[letterCount] == ',')
00464                 break;
00465                         letterCount++;
00466         }
00467         letters[letterCount] = '\0';
00468                 wildcards_clusterWildcards[count].letters = letters;
00469 
00470         // Read details
00471                 fscanf(file, "%d,[", &(wildcards_clusterWildcards[count].wildCode));
00472 
00473         // Print scores
00474         code = 0;
00475         while (code < encoding_aaStartWildcards)
00476         {
00477                 fscanf(file, "%d", &(wildcards_clusterWildcards[count].scoreMatrixRow[code]));
00478                 code++;
00479 
00480             if (code < encoding_aaStartWildcards)
00481                 fscanf(file, ",");
00482         }
00483 
00484                 fscanf(file, "],%f\n", &(wildcards_clusterWildcards[count].averageScore));
00485 
00486         count++;
00487     }
00488 
00489     free(letters);
00490     fclose(file);
00491 }

Here is the call graph for this function:

Here is the caller graph for this function:

void wildcards_removeSubset ( struct wild set1,
uint4 *  size1,
struct wild set2,
uint4  size2,
uint4 *  numOccurences 
)

Definition at line 179 of file wildcards.c.

References wild::count, and uint4.

Referenced by wildcards_scoreCandidates().

00181 {
00182         uint4 newSize1, count1, count2, match;
00183 
00184     // For each wild in set1
00185     *numOccurences = 0;
00186     count1 = 0; newSize1 = 0;
00187     while (count1 < *size1)
00188     {
00189         // Determine if also occurs in set2
00190         match = 0; count2 = 0;
00191         while (count2 < size2)
00192         {
00193                         if (set1[count1].code == set2[count2].code)
00194                 match = 1;
00195                 count2++;
00196         }
00197 
00198         // Only keep if does not appear in set2
00199         if (!match)
00200         {
00201                 set1[newSize1] = set1[count1];
00202             newSize1++;
00203 
00204             *numOccurences += set1[count1].count;
00205         }
00206 
00207         count1++;
00208     }
00209 
00210     *size1 = newSize1;
00211 }

Here is the caller graph for this function:

float wildcards_scoreCandidates ( struct wild wildCandidates,
uint4  numWildCandidates,
struct wild wilds,
uint4  numWilds,
float  defaultWildscore 
)

Definition at line 99 of file wildcards.c.

References clusterWildcard::averageScore, wild::code, encoding_numLetters, global_malloc(), int2, uint4, wildcards_averageResidueWildMatch(), wildcards_clusterWildcards, wildcards_getSubset(), wildcards_joinSubset(), wildcards_removeSubset(), wildcards_scoreMatrixRow, and clusterWildcard::wildCode.

Referenced by cluster_writeClusters(), and main().

00101 {
00102         struct wild* wildSubset, wildCandidate, *pastSubsets = NULL, *wildCandidates;
00103     uint4 sizeWildSubset, sizePastSubsets = 0;
00104         float score, totalSaving = 0;
00105     uint4 numOccurences, candidateNum = 0;
00106 
00107     wildCandidates = (struct wild*)global_malloc(sizeof(struct wild) * numWildCandidates);
00108     memcpy(wildCandidates, originalWildCandidates, sizeof(struct wild) * numWildCandidates);
00109 
00110     // Sort wild candidates
00111 //      qsort(wildCandidates, numWildCandidates, sizeof(struct wild), wildcards_compareWilds);
00112 
00113     // For each wild candidate from shortest to longest
00114     while (candidateNum < numWildCandidates)
00115     {
00116         wildCandidate = wildCandidates[candidateNum];
00117 //        wildcards_printWildcard(wildCandidate.code);
00118 
00119         // Get subset of candidate wildcard
00120         wildSubset = wildcards_getSubset(wildCandidate, wilds, numWilds, &sizeWildSubset, &numOccurences);
00121 
00122         // Remove already process wildcards from subset
00123         wildcards_removeSubset(wildSubset, &sizeWildSubset, pastSubsets, sizePastSubsets, &numOccurences);
00124 
00125 //        printf("wildSubset=%d\n", sizeWildSubset); fflush(stdout);
00126         score = wildcards_averageResidueWildMatch(wildCandidate, wildSubset, sizeWildSubset);
00127 //        printf("Average score=%f occurences=%d defScore=%f\n", score, numOccurences, defaultWildscore);
00128 //        printf("Total saving: %f\n", (defaultWildscore - score) * numOccurences);
00129         totalSaving += (defaultWildscore - score) * numOccurences;
00130 
00131         // Add to past subsets
00132         pastSubsets = wildcards_joinSubset(pastSubsets, &sizePastSubsets,
00133                                              wildSubset, sizeWildSubset);
00134 
00135                 // Update cluster wildcards
00136                 memcpy(wildcards_clusterWildcards[candidateNum].scoreMatrixRow, wildcards_scoreMatrixRow,
00137                sizeof(int2) * encoding_numLetters);
00138 
00139                 wildcards_clusterWildcards[candidateNum].averageScore = score;
00140                 wildcards_clusterWildcards[candidateNum].wildCode = wildCandidate.code;
00141 
00142         free(wildSubset);
00143 
00144         candidateNum++;
00145         }
00146 
00147 /*    printf("Processed wildcards:\n");
00148     count = 0;
00149     while (count < sizePastSubsets)
00150     {
00151                 wildcards_printWildcard(pastSubsets[count].code);
00152         count++;
00153     }*/
00154 
00155     free(pastSubsets);
00156     free(wildCandidates);
00157 
00158     return totalSaving;
00159 }

Here is the call graph for this function:

Here is the caller graph for this function:

float wildcards_scoreResidueWildMatch ( struct wild  wildCandidate,
struct wild wilds,
uint4  numWilds,
uint4  code 
)

Definition at line 289 of file wildcards.c.

References wild::code, constants_sentinalScore, wild::count, encoding_numLetters, getbit, int4, scoreMatrix::matrix, wildcards_scoreMatrix, and wildcards_scoringConstant.

Referenced by wildcards_averageResidueWildMatch().

00291 {
00292         int4 wildCount = 0;
00293     int4 wildResidue, bestScore;
00294     float averageScore, totalScore = 0, totalCount = 0;
00295 
00296     // For each wild
00297     while (wildCount < numWilds)
00298     {
00299                 // Determine the highest scoring matching residue in this wild
00300         bestScore = constants_sentinalScore;
00301                 wildResidue = 0;
00302         while (wildResidue < encoding_numLetters)
00303         {
00304                         if (getbit(wilds[wildCount].code, wildResidue) &&
00305                 wildcards_scoreMatrix.matrix[code][wildResidue] > bestScore)
00306             {
00307                 bestScore = wildcards_scoreMatrix.matrix[code][wildResidue];
00308             }
00309 
00310                 wildResidue++;
00311         }
00312 //              wildcards_printWildcard(wilds[wildCount].code);
00313 //        printf("[%d,%d]\n", wilds[wildCount].count, bestScore);
00314 
00315                 totalScore += ((int4)wilds[wildCount].count * bestScore);
00316         totalCount += wilds[wildCount].count;
00317 
00318         wildCount++;
00319     }
00320 
00321     averageScore = (float)totalScore / (float)totalCount;
00322 
00323     // If code is included in wildcandidate, adjust score
00324     if (getbit(wildCandidate.code, code))
00325     {
00326         averageScore = wildcards_scoringConstant * wildcards_scoreMatrix.matrix[code][code]
00327                      + (1 - wildcards_scoringConstant) * averageScore;
00328     }
00329 
00330     return averageScore;
00331 }

Here is the caller graph for this function:


Variable Documentation

struct clusterWildcard wildcards_clusterWildcards[wildcards_numClusterWildcards]

Definition at line 16 of file wildcards.c.

Referenced by cluster_addChild(), cluster_score(), cluster_writeClusters(), scoreMatrix_load(), wildcards_initialize(), wildcards_outputWildcards(), wildcards_readWildcards(), and wildcards_scoreCandidates().

int wildcards_printWilds

Definition at line 11 of file wildcards.c.

struct scoreMatrix wildcards_scoreMatrix

Definition at line 10 of file wildcards.c.

Referenced by cluster_averageWildcodeScore(), main(), and wildcards_scoreResidueWildMatch().

int2 wildcards_scoreMatrixRow[encoding_aaStartWildcards]

Definition at line 13 of file wildcards.c.

Referenced by wildcards_averageResidueWildMatch(), and wildcards_scoreCandidates().

float wildcards_scoringConstant

Definition at line 12 of file wildcards.c.

Referenced by main(), and wildcards_scoreResidueWildMatch().


Generated on Wed Dec 19 20:53:08 2007 for fsa-blast by  doxygen 1.5.2