00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "common.h"
00015 #include "dnaseq.h"
00016 #include "axt.h"
00017 #include "fuzzyFind.h"
00018 #include "localmem.h"
00019 #include "bandExt.h"
00020
00021 static char const rcsid[] = "$Id: bandExt.c,v 1.13 2006/03/14 19:02:31 angie Exp $";
00022
00023
00024
00025
00026
00027
00028
00029
00030 #define mpMatch 1
00031 #define mpUp 2
00032 #define mpLeft 3
00033 #define mpMask 3
00034 #define upExt (1<<2)
00035 #define lpExt (1<<3)
00036
00037 struct score
00038
00039 {
00040 int match;
00041 int up;
00042 int left;
00043 };
00044
00045 boolean bandExt(boolean global, struct axtScoreScheme *ss, int maxInsert,
00046 char *aStart, int aSize, char *bStart, int bSize, int dir,
00047 int symAlloc, int *retSymCount, char *retSymA, char *retSymB,
00048 int *retStartA, int *retStartB)
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 {
00059 int i;
00060 int *bOffsets = NULL;
00061 UBYTE **parents = NULL;
00062 struct score *curScores = NULL;
00063 struct score *prevScores = NULL;
00064 struct score *swapScores;
00065 int bandSize = 2*maxInsert + 1;
00066 int maxIns1 = maxInsert + 1;
00067 int bandPlus = bandSize + 2*maxIns1;
00068 int bestScore = 0;
00069 int aPos, aBestPos = -1;
00070 int bPos, bBestPos = -1;
00071 int bandCenter = 0;
00072 int colShift = 1;
00073 int prevScoreOffset;
00074 int curScoreOffset;
00075 int symCount = 0;
00076 int gapOpen = ss->gapOpen;
00077 int gapExtend = ss->gapExtend;
00078 int badScore = -gapOpen*100;
00079 int maxDrop = gapOpen + gapExtend*maxInsert;
00080 int midScoreOff;
00081 struct lm *lm;
00082 boolean didExt = FALSE;
00083 int initGapScore = -gapOpen;
00084
00085
00086
00087
00088 if (dir < 0)
00089 {
00090 reverseBytes(aStart, aSize);
00091 reverseBytes(bStart, bSize);
00092 }
00093 #ifdef DEBUG
00094 uglyf("bandExt: dir %d, aStart %d, aSize %d, symAlloc %d\n", dir,
00095 aSize, bSize, symAlloc);
00096 mustWrite(uglyOut, aStart, aSize);
00097 uglyf("\n");
00098 mustWrite(uglyOut, bStart, bSize);
00099 uglyf("\n");
00100 #endif
00101
00102
00103
00104
00105 lm = lmInit(
00106 sizeof(bOffsets[0])*aSize +
00107 sizeof(parents[0])*bandSize +
00108 bandSize*(sizeof(parents[0][0])*aSize) +
00109 sizeof(curScores[0]) * bandPlus +
00110 sizeof(prevScores[0]) * bandPlus );
00111
00112
00113 lmAllocArray(lm, bOffsets, aSize);
00114 lmAllocArray(lm, parents, bandSize);
00115 for (i=0; i<bandSize; ++i)
00116 lmAllocArray(lm, parents[i], aSize);
00117 lmAllocArray(lm, curScores, bandPlus);
00118 lmAllocArray(lm, prevScores, bandPlus);
00119
00120
00121
00122
00123 for (i=0; i<bandPlus; ++i)
00124 {
00125 struct score *cs = &curScores[i];
00126 cs->match = cs->up = cs->left = badScore;
00127 cs = &prevScores[i];
00128 cs->match = cs->up = cs->left = badScore;
00129 }
00130
00131
00132
00133 midScoreOff = 1 + 2 * maxInsert;
00134 prevScores[midScoreOff].match = 0;
00135
00136
00137
00138 {
00139 int score = -gapOpen;
00140 for (i=0; i<maxInsert; ++i)
00141 {
00142 prevScores[midScoreOff+i].up = score;
00143 score -= gapExtend;
00144 }
00145 }
00146
00147 for (aPos=0; aPos < aSize; ++aPos)
00148 {
00149 char aBase = aStart[aPos];
00150 int *matRow = ss->matrix[(int)aBase];
00151 int bestColScore = badScore;
00152 int bestColPos = -1;
00153 int colTop = bandCenter - maxInsert;
00154 int colBottom = bandCenter + maxIns1;
00155
00156 if (colTop < 0) colTop = 0;
00157 if (colBottom > bSize) colBottom = bSize;
00158 curScoreOffset = maxIns1 + colTop - (bandCenter - maxInsert);
00159 prevScoreOffset = curScoreOffset + colShift;
00160 #ifdef DEBUG
00161 uglyf("curScoreOffset %d, maxIns %d, prevScoreOffset %d\n", curScoreOffset, maxInsert, prevScoreOffset);
00162 #endif
00163
00164
00165
00166 if (aPos < maxInsert)
00167 {
00168 curScores[curScoreOffset-1].up = initGapScore;
00169 initGapScore -= gapExtend;
00170 }
00171 else
00172 curScores[curScoreOffset-1].up = badScore;
00173
00174 for (bPos = colTop; bPos < colBottom; ++bPos)
00175 {
00176 UBYTE parent;
00177 struct score *curScore = &curScores[curScoreOffset];
00178
00179 #ifdef DEBUG
00180 uglyf("aPos %d, bPos %d, %c vs %c\n", aPos, bPos, aBase, bStart[bPos]);
00181 uglyf(" diag[%d %d %d], left[%d %d %d], up[%d %d %d]\n",
00182 prevScores[prevScoreOffset-1].match,
00183 prevScores[prevScoreOffset-1].left,
00184 prevScores[prevScoreOffset-1].up,
00185 prevScores[prevScoreOffset].match,
00186 prevScores[prevScoreOffset].left,
00187 prevScores[prevScoreOffset].up,
00188 curScores[curScoreOffset-1].match,
00189 curScores[curScoreOffset-1].left,
00190 curScores[curScoreOffset-1].up);
00191 #endif
00192
00193
00194
00195 {
00196 int match = matRow[(int)bStart[bPos]];
00197 struct score *a = &prevScores[prevScoreOffset-1];
00198 int diagScore = a->match;
00199 int leftScore = a->left;
00200 int upScore = a->up;
00201 int score;
00202 if (diagScore >= leftScore && diagScore >= upScore)
00203 {
00204 score = diagScore + match;
00205 parent = mpMatch;
00206 }
00207 else if (leftScore > upScore)
00208 {
00209 score = leftScore + match;
00210 parent = mpLeft;
00211 }
00212 else
00213 {
00214 score = upScore + match;
00215 parent = mpUp;
00216 }
00217 curScore->match = score;
00218 if (score > bestColScore)
00219 {
00220 bestColScore = score;
00221 bestColPos = bPos;
00222 }
00223 }
00224
00225
00226 {
00227 struct score *a = &prevScores[prevScoreOffset];
00228 int extScore = a->left - gapExtend;
00229 int openScore = a->match - gapOpen;
00230 if (extScore >= openScore)
00231 {
00232 parent |= lpExt;
00233 curScore->left = extScore;
00234 }
00235 else
00236 {
00237 curScore->left = openScore;
00238 }
00239 }
00240
00241
00242 {
00243 struct score *a = &curScore[-1];
00244 int extScore = a->up - gapExtend;
00245 int openScore = a->match - gapOpen;
00246 if (extScore >= openScore)
00247 {
00248 parent |= upExt;
00249 curScore->up = extScore;
00250 }
00251 else
00252 {
00253 curScore->up = openScore;
00254 }
00255 }
00256
00257 parents[curScoreOffset - maxIns1][aPos] = parent;
00258
00259 #ifdef DEBUG
00260 uglyf(" cur [%d %d %d]\n",
00261 curScore->match,
00262 curScore->left,
00263 curScore->up);
00264 #endif
00265
00266 curScoreOffset += 1;
00267 prevScoreOffset += 1;
00268 }
00269
00270
00271
00272
00273
00274
00275
00276
00277 if (bestScore < bestColScore)
00278 {
00279 bestScore = bestColScore;
00280 aBestPos = aPos;
00281 bBestPos = bestColPos;
00282 colShift = (bestColPos + 1) - bandCenter;
00283 }
00284 else if (bestColScore < bestScore - maxDrop)
00285 {
00286 if (!global)
00287 break;
00288 }
00289 else
00290 {
00291 colShift = 1;
00292 }
00293
00294
00295
00296 bOffsets[aPos] = bandCenter;
00297 bandCenter += colShift;
00298
00299
00300 swapScores = curScores;
00301 curScores = prevScores;
00302 prevScores = swapScores;
00303 }
00304
00305
00306
00307 #ifdef DEBUG
00308 uglyf("traceback: bestScore %d, aBestPos %d, bBestPos %d\n", bestScore, aBestPos, bBestPos);
00309 #endif
00310 if (global || bestScore > 0)
00311 {
00312 boolean upState = FALSE;
00313 boolean leftState = FALSE;
00314 didExt = TRUE;
00315 if (global)
00316 {
00317 aPos = aSize-1;
00318 bPos = bSize-1;
00319 }
00320 else
00321 {
00322 aPos = aBestPos;
00323 bPos = bBestPos;
00324 }
00325 for (;;)
00326 {
00327 int pOffset;
00328 UBYTE parent;
00329 pOffset = bPos - bOffsets[aPos] + maxInsert;
00330 if (pOffset < 0) pOffset = 0;
00331
00332
00333
00334
00335
00336
00337 if (pOffset >= bandSize)
00338 {
00339 #ifdef DEBUG
00340 uglyf("bPos %d, aPos %d, bOffsets[aPos] %d, maxInsert %d\n", bPos, aPos, bOffsets[aPos], maxInsert);
00341 uglyf("pOffset = %d, bandSize = %d\n", pOffset, bandSize);
00342 uglyf("aSize %d, bSize %d\n", aSize, bSize);
00343 faWriteNext(uglyOut, "uglyA", aStart, aSize);
00344 faWriteNext(uglyOut, "uglyB", bStart, bSize);
00345 #endif
00346 assert(global);
00347 return FALSE;
00348 }
00349 parent = parents[pOffset][aPos];
00350 #ifdef DEBUG
00351 uglyf("aPos %d, bPos %d, parent %d, pMask %d upState %d, leftState %d\n", aPos, bPos, parent, parent & mpMask, upState, leftState);
00352 #endif
00353 if (upState)
00354 {
00355 retSymA[symCount] = '-';
00356 retSymB[symCount] = bStart[bPos];
00357 bPos -= 1;
00358 upState = (parent&upExt);
00359 }
00360 else if (leftState)
00361 {
00362 retSymA[symCount] = aStart[aPos];
00363 retSymB[symCount] = '-';
00364 aPos -= 1;
00365 leftState = (parent&lpExt);
00366 }
00367 else
00368 {
00369 retSymA[symCount] = aStart[aPos];
00370 retSymB[symCount] = bStart[bPos];
00371 aPos -= 1;
00372 bPos -= 1;
00373 parent &= mpMask;
00374 if (parent == mpUp)
00375 upState = TRUE;
00376 else if (parent == mpLeft)
00377 leftState = TRUE;
00378 }
00379 if (++symCount >= symAlloc)
00380 errAbort("unsufficient symAlloc in bandExt");
00381 if (aPos < 0 || bPos < 0)
00382 {
00383 while (aPos >= 0)
00384 {
00385 retSymA[symCount] = aStart[aPos];
00386 retSymB[symCount] = '-';
00387 if (++symCount >= symAlloc)
00388 errAbort("unsufficient symAlloc in bandExt");
00389 --aPos;
00390 }
00391 while (bPos >= 0)
00392 {
00393 retSymA[symCount] = '-';
00394 retSymB[symCount] = bStart[bPos];
00395 if (++symCount >= symAlloc)
00396 errAbort("unsufficient symAlloc in bandExt");
00397 --bPos;
00398 }
00399 break;
00400 }
00401 }
00402 if (dir > 0)
00403 {
00404 reverseBytes(retSymA, symCount);
00405 reverseBytes(retSymB, symCount);
00406 }
00407 retSymA[symCount] = 0;
00408 retSymB[symCount] = 0;
00409 }
00410 else
00411 {
00412 retSymA[0] = retSymB[0] = 0;
00413 }
00414
00415 if (dir < 0)
00416 {
00417 reverseBytes(aStart, aSize);
00418 reverseBytes(bStart, bSize);
00419 }
00420
00421
00422 lmCleanup(&lm);
00423 if (retStartA != NULL) *retStartA = aBestPos;
00424 if (retStartB != NULL) *retStartB = bBestPos;
00425 *retSymCount = symCount;
00426 return didExt;
00427 }
00428
00429 struct ffAli *bandExtFf(
00430 struct lm *lm,
00431 struct axtScoreScheme *ss,
00432 int maxInsert,
00433 struct ffAli *origFf,
00434 char *nStart, char *nEnd,
00435 char *hStart, char *hEnd,
00436 int dir,
00437 int maxExt)
00438
00439
00440 {
00441 int symAlloc = 2*maxExt;
00442 char *symBuf = needMem(4*maxExt);
00443 char *nBuf = symBuf;
00444 char *hBuf = symBuf + symAlloc;
00445 char *ns, *hs;
00446 int symCount, nExt, hExt;
00447 int nSize, hSize;
00448 struct ffAli *ffList = NULL;
00449 boolean gotExt;
00450
00451 if (dir > 0)
00452 {
00453 nSize = nEnd - origFf->nEnd;
00454 hSize = hEnd - origFf->hEnd;
00455 if (nSize > maxExt) nSize = maxExt;
00456 if (hSize > maxExt) hSize = maxExt;
00457 ns = origFf->nEnd;
00458 hs = origFf->hEnd;
00459 }
00460 else
00461 {
00462 nSize = origFf->nStart - nStart;
00463 hSize = origFf->hStart - hStart;
00464 if (nSize > maxExt) nSize = maxExt;
00465 if (hSize > maxExt) hSize = maxExt;
00466 ns = origFf->nStart - nSize;
00467 hs = origFf->hStart - hSize;
00468 }
00469
00470 gotExt = bandExt(FALSE, ss, maxInsert, ns, nSize, hs, hSize, dir,
00471 symAlloc, &symCount, nBuf, hBuf, &nExt, &hExt);
00472 if (gotExt)
00473 {
00474 char *nExtStart, *hExtStart;
00475 if (dir > 0)
00476 {
00477 nExtStart = ns;
00478 hExtStart = hs;
00479 }
00480 else
00481 {
00482 nExtStart = origFf->nStart - nExt - 1;
00483 hExtStart = origFf->hStart - hExt - 1;
00484 }
00485 ffList = ffAliFromSym(symCount, nBuf, hBuf, lm, nExtStart, hExtStart);
00486 }
00487 freeMem(symBuf);
00488 return ffList;
00489 }
00490