00001
00002
00003
00004 #include "common.h"
00005 #include "chain.h"
00006 #include "axt.h"
00007 #include "gapCalc.h"
00008 #include "chainConnect.h"
00009
00010 static char const rcsid[] = "$Id: chainConnect.c,v 1.3 2005/08/07 23:25:26 baertsch Exp $";
00011
00012 double chainScoreBlock(char *q, char *t, int size, int matrix[256][256])
00013
00014 {
00015 double score = 0;
00016 int i;
00017 for (i=0; i<size; ++i)
00018 score += matrix[(int)q[i]][(int)t[i]];
00019 return score;
00020 }
00021
00022 double chainCalcScore(struct chain *chain, struct axtScoreScheme *ss,
00023 struct gapCalc *gapCalc, struct dnaSeq *query, struct dnaSeq *target)
00024
00025 {
00026 struct cBlock *b1, *b2;
00027 double score = 0;
00028 for (b1 = chain->blockList; b1 != NULL; b1 = b2)
00029 {
00030 score += chainScoreBlock(query->dna + b1->qStart,
00031 target->dna + b1->tStart, b1->tEnd - b1->tStart, ss->matrix);
00032 b2 = b1->next;
00033 if (b2 != NULL)
00034 score -= gapCalcCost(gapCalc, b2->qStart - b1->qEnd,
00035 b2->tStart - b1->tEnd);
00036 }
00037 return score;
00038 }
00039
00040 double chainCalcScoreSubChain(struct chain *chain, struct axtScoreScheme *ss,
00041 struct gapCalc *gapCalc, struct dnaSeq *query, struct dnaSeq *target)
00042
00043
00044 {
00045 struct cBlock *b1, *b2;
00046 double score = 0;
00047 for (b1 = chain->blockList; b1 != NULL; b1 = b2)
00048 {
00049 score += chainScoreBlock(query->dna + (b1->qStart) - chain->qStart,
00050 target->dna + (b1->tStart) - chain->tStart, b1->tEnd - b1->tStart, ss->matrix);
00051 b2 = b1->next;
00052 if (b2 != NULL)
00053 score -= gapCalcCost(gapCalc, b2->qStart - b1->qEnd,
00054 b2->tStart - b1->tEnd);
00055 }
00056 return score;
00057 }
00058
00059 void cBlockFindCrossover(struct cBlock *left, struct cBlock *right,
00060 struct dnaSeq *qSeq, struct dnaSeq *tSeq,
00061 int overlap, int matrix[256][256], int *retPos, int *retScoreAdjustment)
00062
00063
00064
00065
00066
00067 {
00068 int bestPos = 0;
00069 char *rqStart = qSeq->dna + right->qStart;
00070 char *lqStart = qSeq->dna + left->qEnd - overlap;
00071 char *rtStart = tSeq->dna + right->tStart;
00072 char *ltStart = tSeq->dna + left->tEnd - overlap;
00073 int i;
00074 double score, bestScore, rScore, lScore;
00075
00076
00077 if (overlap > (left->tEnd - left->tStart) ||
00078 overlap > (right->tEnd - right->tStart))
00079 errAbort("overlap is %d -- too large for one of these:\n"
00080 "qSize=%d tSize=%d\n"
00081 "left: qStart=%d qEnd=%d (%d) tStart=%d tEnd=%d (%d)\n"
00082 "right: qStart=%d qEnd=%d (%d) tStart=%d tEnd=%d (%d)\n",
00083 overlap, qSeq->size, tSeq->size,
00084 left->qStart, left->qEnd, left->qEnd - left->qStart,
00085 left->tStart, left->tEnd, left->tEnd - left->tStart,
00086 right->qStart, right->qEnd, right->qEnd - right->qStart,
00087 right->tStart, right->tEnd, right->tEnd - right->tStart);
00088
00089 score = bestScore = rScore = chainScoreBlock(rqStart, rtStart, overlap, matrix);
00090 lScore = chainScoreBlock(lqStart, ltStart, overlap, matrix);
00091 for (i=0; i<overlap; ++i)
00092 {
00093 score += matrix[(int)lqStart[i]][(int)ltStart[i]];
00094 score -= matrix[(int)rqStart[i]][(int)rtStart[i]];
00095 if (score > bestScore)
00096 {
00097 bestScore = score;
00098 bestPos = i+1;
00099 }
00100 }
00101 *retPos = bestPos;
00102 *retScoreAdjustment = rScore + lScore - bestScore;
00103 }
00104
00105
00106 int chainConnectGapCost(int dq, int dt, struct chainConnect *cc)
00107
00108 {
00109 return gapCalcCost(cc->gapCalc, dq, dt);
00110 }
00111
00112 int chainConnectCost(struct cBlock *a, struct cBlock *b,
00113 struct chainConnect *cc)
00114
00115
00116 {
00117 int dq = b->qStart - a->qEnd;
00118 int dt = b->tStart - a->tEnd;
00119 int overlapAdjustment = 0;
00120
00121 if (a->qStart >= b->qStart || a->tStart >= b->tStart)
00122 {
00123 errAbort("a (%d %d) not strictly before b (%d %d)",
00124 a->qStart, a->tStart, b->qStart, b->tStart);
00125 }
00126 if (dq < 0 || dt < 0)
00127 {
00128 int bSize = b->qEnd - b->qStart;
00129 int aSize = a->qEnd - a->qStart;
00130 int overlap = -min(dq, dt);
00131 int crossover;
00132 if (overlap >= bSize || overlap >= aSize)
00133 {
00134
00135
00136 overlapAdjustment = 100000000;
00137 }
00138 else
00139 {
00140 cBlockFindCrossover(a, b, cc->query, cc->target, overlap,
00141 cc->ss->matrix, &crossover, &overlapAdjustment);
00142 dq += overlap;
00143 dt += overlap;
00144 }
00145 }
00146 return overlapAdjustment + gapCalcCost(cc->gapCalc, dq, dt);
00147 }
00148
00149 static void checkStartBeforeEnd(struct chain *chain, char *message)
00150
00151 {
00152 struct cBlock *b;
00153 for (b = chain->blockList; b != NULL; b = b->next)
00154 {
00155 if (b->qStart >= b->qEnd || b->tStart >= b->tEnd)
00156 errAbort("Start after end in (%d %d) to (%d %d) %s",
00157 b->qStart, b->tStart, b->qEnd, b->tEnd, message);
00158 }
00159 }
00160
00161 static void checkChainGaps(struct chain *chain, char *message)
00162
00163 {
00164 struct cBlock *a, *b;
00165 a = chain->blockList;
00166 if (a == NULL)
00167 return;
00168 for (b = a->next; b != NULL; b = b->next)
00169 {
00170 if (a->qEnd > b->qStart || a->tEnd > b->tStart)
00171 {
00172 errAbort("Negative gap between (%d %d - %d %d) and (%d %d - %d %d) %s",
00173 a->qStart, a->tStart, a->qEnd, a->tEnd,
00174 b->qStart, b->tStart, b->qEnd, b->tEnd, message);
00175 }
00176 a = b;
00177 }
00178 }
00179
00180 static void checkChainIncreases(struct chain *chain, char *message)
00181
00182
00183 {
00184 struct cBlock *a, *b;
00185 a = chain->blockList;
00186 if (a == NULL)
00187 return;
00188 for (b = a->next; b != NULL; b = b->next)
00189 {
00190 if (a->qStart >= b->qStart || a->tStart >= b->tStart)
00191 {
00192 errAbort("a (%d %d) not before b (%d %d) %s",
00193 a->qStart, a->tStart, b->qStart, b->tStart, message);
00194 }
00195 a = b;
00196 }
00197 }
00198
00199 void chainCalcBounds(struct chain *chain)
00200
00201
00202 {
00203 struct cBlock *b = chain->blockList;
00204 if (chain->blockList == NULL)
00205 return;
00206 chain->qStart = b->qStart;
00207 chain->tStart = b->tStart;
00208 b = slLastEl(chain->blockList);
00209 chain->qEnd = b->qEnd;
00210 chain->tEnd = b->tEnd;
00211 }
00212
00213 static boolean removeNegativeBlocks(struct chain *chain)
00214
00215
00216
00217
00218 {
00219 struct cBlock *newList = NULL, *b, *next;
00220 boolean gotNeg = FALSE;
00221 for (b = chain->blockList; b != NULL; b = next)
00222 {
00223 next = b->next;
00224 if (b->qStart >= b->qEnd || b->tStart >= b->tEnd)
00225 {
00226 gotNeg = TRUE;
00227 freeMem(b);
00228 }
00229 else
00230 {
00231 slAddHead(&newList, b);
00232 }
00233 }
00234 slReverse(&newList);
00235 chain->blockList = newList;
00236 if (gotNeg)
00237 chainCalcBounds(chain);
00238 return gotNeg;
00239 }
00240
00241 void setChainBounds(struct chain *chain)
00242
00243 {
00244 struct cBlock *b = chain->blockList;
00245 chain->qStart = b->qStart;
00246 chain->tStart = b->tStart;
00247 while (b->next != NULL)
00248 b = b->next;
00249 chain->qEnd = b->qEnd;
00250 chain->tEnd = b->tEnd;
00251 }
00252
00253 void chainRemovePartialOverlaps(struct chain *chain,
00254 struct dnaSeq *qSeq, struct dnaSeq *tSeq, int matrix[256][256])
00255
00256 {
00257 struct cBlock *a, *b;
00258 boolean totalTrimA, totalTrimB;
00259
00260 assert(chain->blockList != NULL);
00261
00262
00263
00264 checkChainIncreases(chain, "before removePartialOverlaps");
00265
00266
00267
00268
00269
00270
00271
00272 for (;;)
00273 {
00274 totalTrimA = totalTrimB = FALSE;
00275 a = chain->blockList;
00276 b = a->next;
00277 for (;;)
00278 {
00279 int dq, dt;
00280 if (b == NULL)
00281 break;
00282 dq = b->qStart - a->qEnd;
00283 dt = b->tStart - a->tEnd;
00284 if (dq < 0 || dt < 0)
00285 {
00286 int overlap = -min(dq, dt);
00287 int aSize = a->qEnd - a->qStart;
00288 int bSize = b->qEnd - b->qStart;
00289 int crossover, invCross, overlapAdjustment;
00290 if (overlap >= aSize || overlap >= bSize)
00291 {
00292 totalTrimB = TRUE;
00293 }
00294 else
00295 {
00296 cBlockFindCrossover(a, b, qSeq, tSeq, overlap, matrix,
00297 &crossover, &overlapAdjustment);
00298 b->qStart += crossover;
00299 b->tStart += crossover;
00300 invCross = overlap - crossover;
00301 a->qEnd -= invCross;
00302 a->tEnd -= invCross;
00303 if (b->qEnd <= b->qStart)
00304 {
00305 totalTrimB = TRUE;
00306 }
00307 else if (a->qEnd <= a->qStart)
00308 {
00309 totalTrimA = TRUE;
00310 }
00311 }
00312 }
00313 if (totalTrimA == TRUE)
00314 {
00315 removeNegativeBlocks(chain);
00316 break;
00317 }
00318 else if (totalTrimB == TRUE)
00319 {
00320 b = b->next;
00321 freez(&a->next);
00322 a->next = b;
00323 totalTrimB = FALSE;
00324 }
00325 else
00326 {
00327 a = b;
00328 b = b->next;
00329 }
00330 }
00331 if (!totalTrimA)
00332 break;
00333 }
00334
00335
00336
00337 setChainBounds(chain);
00338
00339
00340 checkChainGaps(chain, "after removePartialOverlaps");
00341 checkStartBeforeEnd(chain, "after removePartialOverlaps");
00342 }
00343
00344 void chainMergeAbutting(struct chain *chain)
00345
00346
00347 {
00348 struct cBlock *newList = NULL, *b, *last = NULL, *next;
00349 for (b = chain->blockList; b != NULL; b = next)
00350 {
00351 next = b->next;
00352 if (last == NULL || last->qEnd != b->qStart || last->tEnd != b->tStart)
00353 {
00354 slAddHead(&newList, b);
00355 last = b;
00356 }
00357 else
00358 {
00359 last->qEnd = b->qEnd;
00360 last->tEnd = b->tEnd;
00361 freeMem(b);
00362 }
00363 }
00364 slReverse(&newList);
00365 chain->blockList = newList;
00366 }
00367