This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Data Structures | |
| struct | chunk |
| struct | maskRegion |
Functions | |
| void | dust_dustSequence (char *originalSequence) |
| void | dust_processChunk (int windowLength, unsigned char *sequence, int chunkStart, struct chunk *chunk) |
| void | dust_processWindow (int windowLength, int windowStart, struct chunk *chunk, unsigned char *sequence) |
| void dust_dustSequence | ( | char * | originalSequence | ) |
Definition at line 12 of file dust.c.
References dust_processWindow(), encoding_encodeSequence(), encoding_nucleotide, encoding_numRegularLetters, encoding_randomEncodedLetter(), chunk::end, maskRegion::from, global_malloc(), chunk::score, chunk::start, and maskRegion::to.
Referenced by main().
00013 { 00014 int windowStart; 00015 unsigned char *sequence; 00016 int sequenceLength, windowLength, count; 00017 int cutoffScore = 20; 00018 int windowSize = 64; 00019 int minimumRegionSize = 4; 00020 int linker = 1; 00021 int windowhalf = windowSize / 2; 00022 struct chunk chunk; 00023 struct maskRegion *regions = NULL, *currentRegion = NULL; 00024 00025 sequenceLength = strlen(originalSequence); 00026 00027 sequence = (unsigned char*)global_malloc(sequenceLength + 1); 00028 strcpy(sequence, originalSequence); 00029 00030 // Convert sequence into encoded format 00031 encoding_encodeSequence(sequence, sequenceLength, encoding_nucleotide); 00032 00033 // Replace wildcards in the sequence 00034 count = 0; 00035 while (count < sequenceLength - 2) 00036 { 00037 // If a wild 00038 if (sequence[count] >= encoding_numRegularLetters) 00039 { 00040 // Code replacement 00041 sequence[count] = encoding_randomEncodedLetter(sequence[count]); 00042 } 00043 count++; 00044 } 00045 00046 // Convert sequence into encoded triplets 00047 count = 0; 00048 while (count < sequenceLength - 2) 00049 { 00050 // Encode triplet 00051 sequence[count] = (sequence[count] << 4) | (sequence[count + 1] << 2) | sequence[count + 2]; 00052 00053 count++; 00054 } 00055 00056 // Slide a window along the sequence 00057 for (windowStart = 0; windowStart < sequenceLength; windowStart += windowhalf) 00058 { 00059 windowLength = (int)((sequenceLength > windowStart+windowSize) ? windowSize : sequenceLength - windowStart); 00060 windowLength -= 2; 00061 00062 // printf("process window (length=%d, position=%d)\n", windowLength, windowStart); fflush(stdout); 00063 dust_processWindow(windowLength, windowStart, &chunk, sequence); 00064 00065 // printf("Chunk start=%d end=%d score=%d\n", chunk.start, chunk.end, chunk.score); 00066 // fflush(stdout); 00067 00068 // Ignore chunks that are smaller than the minimum 00069 if ((chunk.end - chunk.start + 1) < minimumRegionSize) 00070 { 00071 continue; 00072 } 00073 00074 if (chunk.score > cutoffScore) 00075 { 00076 // If this region can be linked to previous (they are close to each other) 00077 if (regions && regions->to + linker >= chunk.start + windowStart && 00078 regions->from <= chunk.start + windowStart) 00079 { 00080 // Extend previous region 00081 regions->to = chunk.end + windowStart; 00082 } 00083 else 00084 { 00085 // Add new region to start of linked list 00086 currentRegion = (struct maskRegion*)global_malloc(sizeof(struct maskRegion)); 00087 currentRegion->from = chunk.start + windowStart; 00088 currentRegion->to = chunk.end + windowStart; 00089 currentRegion->next = regions; 00090 regions = currentRegion; 00091 } 00092 if (chunk.end < windowhalf) 00093 { 00094 // Advance next window to end of chunk 00095 windowStart += (chunk.end - windowhalf); 00096 } 00097 } 00098 } 00099 00100 free(sequence); 00101 00102 // For each region 00103 currentRegion = regions; 00104 while (currentRegion != NULL) 00105 { 00106 // Mask it using N's 00107 count = currentRegion->from; 00108 while (count <= currentRegion->to) 00109 { 00110 originalSequence[count] = 'n'; 00111 count++; 00112 } 00113 00114 // printf("Start=%d End=%d\n", currentRegion->from, currentRegion->to); 00115 currentRegion = currentRegion->next; 00116 } 00117 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void dust_processChunk | ( | int | windowLength, | |
| unsigned char * | sequence, | |||
| int | chunkStart, | |||
| struct chunk * | chunk | |||
| ) |
Definition at line 144 of file dust.c.
References chunk::end, chunk::score, and chunk::start.
Referenced by dust_processWindow().
00145 { 00146 unsigned int sum; 00147 int position, triplet, numOccurrences; 00148 int newScore; 00149 int occurrences[256]; 00150 00151 // Initialize triplet occurrences to zero 00152 triplet = 0; 00153 while (triplet < 64) 00154 { 00155 occurrences[triplet] = 0; 00156 triplet++; 00157 } 00158 00159 sum = 0; 00160 newScore = 0; 00161 00162 // For each triplet in the sequence 00163 for (position = 0; position < windowLength; position++) 00164 { 00165 if (*sequence != 255) 00166 { 00167 // Increment counter of its occurance 00168 numOccurrences = occurrences[*sequence]; 00169 00170 // If it has occured more than one 00171 if (numOccurrences) 00172 { 00173 // Calculate score 00174 sum += numOccurrences; 00175 newScore = 10 * sum / position; 00176 00177 // If the best score yet 00178 if (newScore > chunk->score) 00179 { 00180 // Record the start and end of this high-scoring region 00181 chunk->score = newScore; 00182 chunk->start = chunkStart; 00183 chunk->end = position + 2; 00184 } 00185 } 00186 occurrences[*sequence]++; 00187 } 00188 sequence++; 00189 // printf("[%d]", *occurrencesptr); 00190 } 00191 }
Here is the caller graph for this function:

| void dust_processWindow | ( | int | windowLength, | |
| int | windowStart, | |||
| struct chunk * | chunk, | |||
| unsigned char * | sequence | |||
| ) |
Definition at line 120 of file dust.c.
References dust_processChunk(), chunk::end, chunk::score, and chunk::start.
Referenced by dust_dustSequence().
00121 { 00122 int chunkStart; 00123 00124 // Initialize best chunk 00125 chunk->score = 0; 00126 chunk->start = 0; 00127 chunk->end = 0; 00128 00129 // Get window of the sequence 00130 sequence += windowStart; 00131 00132 // Perform dust on each chunk in the window 00133 for (chunkStart = 0; chunkStart < windowLength; chunkStart++) 00134 { 00135 // printf("wo1 (%d,%d)\n", windowLength-i, i); 00136 dust_processChunk(windowLength - chunkStart, sequence + chunkStart, chunkStart, chunk); 00137 } 00138 00139 // Update chunk end 00140 chunk->end += chunk->start; 00141 }
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2