#include "common.h"#include "dnaseq.h"#include "axt.h"#include "fuzzyFind.h"#include "localmem.h"#include "bandExt.h"Include dependency graph for bandExt.c:

Go to the source code of this file.
Data Structures | |
| struct | score |
Defines | |
| #define | mpMatch 1 |
| #define | mpUp 2 |
| #define | mpLeft 3 |
| #define | mpMask 3 |
| #define | upExt (1<<2) |
| #define | lpExt (1<<3) |
Functions | |
| boolean | bandExt (boolean global, struct axtScoreScheme *ss, int maxInsert, char *aStart, int aSize, char *bStart, int bSize, int dir, int symAlloc, int *retSymCount, char *retSymA, char *retSymB, int *retStartA, int *retStartB) |
| ffAli * | bandExtFf (struct lm *lm, struct axtScoreScheme *ss, int maxInsert, struct ffAli *origFf, char *nStart, char *nEnd, char *hStart, char *hEnd, int dir, int maxExt) |
Variables | |
| static char const | rcsid [] = "$Id: bandExt.c,v 1.13 2006/03/14 19:02:31 angie Exp $" |
| boolean bandExt | ( | boolean | global, | |
| struct axtScoreScheme * | ss, | |||
| int | maxInsert, | |||
| char * | aStart, | |||
| int | aSize, | |||
| char * | bStart, | |||
| int | bSize, | |||
| int | dir, | |||
| int | symAlloc, | |||
| int * | retSymCount, | |||
| char * | retSymA, | |||
| char * | retSymB, | |||
| int * | retStartA, | |||
| int * | retStartB | |||
| ) |
Definition at line 45 of file bandExt.c.
References errAbort(), FALSE, faWriteNext(), score::left, lm, lmAllocArray, lmCleanup(), lmInit(), lpExt, score::match, mpLeft, mpMask, mpMatch, mpUp, mustWrite(), reverseBytes(), ss, TRUE, UBYTE, uglyf, uglyOut, score::up, and upExt.
Referenced by bandExtFf(), hardRefineSplice(), and smoothOneGap().
00058 { 00059 int i; /* A humble index or two. */ 00060 int *bOffsets = NULL; /* Offset of top of band. */ 00061 UBYTE **parents = NULL; /* Array of parent positions. */ 00062 struct score *curScores = NULL; /* Scores for current column. */ 00063 struct score *prevScores = NULL; /* Scores for previous column. */ 00064 struct score *swapScores; /* Helps to swap cur & prev column. */ 00065 int bandSize = 2*maxInsert + 1; /* Size of band including middle */ 00066 int maxIns1 = maxInsert + 1; /* Max insert plus one. */ 00067 int bandPlus = bandSize + 2*maxIns1; /* Band plus sentinels on either end. */ 00068 int bestScore = 0; /* Best score so far. */ 00069 int aPos, aBestPos = -1; /* Current and best position in a. */ 00070 int bPos, bBestPos = -1; /* Current and best position in b. */ 00071 int bandCenter = 0; /* b Coordinate of current band center. */ 00072 int colShift = 1; /* Vertical shift between this column and previous. */ 00073 int prevScoreOffset; /* Offset into previous score column. */ 00074 int curScoreOffset; /* Offset into current score column. */ 00075 int symCount = 0; /* Size of alignment and size allocated for it. */ 00076 int gapOpen = ss->gapOpen; /* First base in gap costs this. */ 00077 int gapExtend = ss->gapExtend; /* Extra bases in gap cost this. */ 00078 int badScore = -gapOpen*100; /* A score bad enough no one will want to link with us. */ 00079 int maxDrop = gapOpen + gapExtend*maxInsert; /* Max score drop allowed before giving up. */ 00080 int midScoreOff; /* Offset to middle of scoring array. */ 00081 struct lm *lm; /* Local memory pool. */ 00082 boolean didExt = FALSE; 00083 int initGapScore = -gapOpen; 00084 00085 /* For reverse direction just reverse bytes here and there. It's 00086 * a lot easier than the alternative and doesn't cost much time in 00087 * the global scheme of things. */ 00088 if (dir < 0) 00089 { 00090 reverseBytes(aStart, aSize); 00091 reverseBytes(bStart, bSize); 00092 } 00093 #ifdef DEBUG 00094 uglyf("bandExt: dir %d, aStart %d, aSize %d, symAlloc %d\n", dir, 00095 aSize, bSize, symAlloc); 00096 mustWrite(uglyOut, aStart, aSize); 00097 uglyf("\n"); 00098 mustWrite(uglyOut, bStart, bSize); 00099 uglyf("\n"); 00100 #endif /* DEBUG */ 00101 00102 /* Make up a local mem structure big enough for everything. 00103 * This is just a minor, perhaps misguided speed tweak to 00104 * avoid multiple mallocs. */ 00105 lm = lmInit( 00106 sizeof(bOffsets[0])*aSize + 00107 sizeof(parents[0])*bandSize + 00108 bandSize*(sizeof(parents[0][0])*aSize) + 00109 sizeof(curScores[0]) * bandPlus + 00110 sizeof(prevScores[0]) * bandPlus ); 00111 00112 /* Allocate data structures out of local memory pool. */ 00113 lmAllocArray(lm, bOffsets, aSize); 00114 lmAllocArray(lm, parents, bandSize); 00115 for (i=0; i<bandSize; ++i) 00116 lmAllocArray(lm, parents[i], aSize); 00117 lmAllocArray(lm, curScores, bandPlus); 00118 lmAllocArray(lm, prevScores, bandPlus); 00119 00120 /* Set up scoring arrays so that stuff outside of the band boundary 00121 * looks bad. There will be maxIns+1 of these sentinel values at 00122 * the start and at the beginning. */ 00123 for (i=0; i<bandPlus; ++i) 00124 { 00125 struct score *cs = &curScores[i]; 00126 cs->match = cs->up = cs->left = badScore; 00127 cs = &prevScores[i]; 00128 cs->match = cs->up = cs->left = badScore; 00129 } 00130 00131 /* Set up scoring array so that extending without an initial insert 00132 * looks relatively good. */ 00133 midScoreOff = 1 + 2 * maxInsert; 00134 prevScores[midScoreOff].match = 0; 00135 00136 /* Set up scoring array so that initial inserts of up to maxInsert 00137 * are allowed but penalized appropriately. */ 00138 { 00139 int score = -gapOpen; 00140 for (i=0; i<maxInsert; ++i) 00141 { 00142 prevScores[midScoreOff+i].up = score; 00143 score -= gapExtend; 00144 } 00145 } 00146 00147 for (aPos=0; aPos < aSize; ++aPos) 00148 { 00149 char aBase = aStart[aPos]; 00150 int *matRow = ss->matrix[(int)aBase]; 00151 int bestColScore = badScore; 00152 int bestColPos = -1; 00153 int colTop = bandCenter - maxInsert; 00154 int colBottom = bandCenter + maxIns1; 00155 00156 if (colTop < 0) colTop = 0; 00157 if (colBottom > bSize) colBottom = bSize; 00158 curScoreOffset = maxIns1 + colTop - (bandCenter - maxInsert); 00159 prevScoreOffset = curScoreOffset + colShift; 00160 #ifdef DEBUG 00161 uglyf("curScoreOffset %d, maxIns %d, prevScoreOffset %d\n", curScoreOffset, maxInsert, prevScoreOffset); 00162 #endif /* DEBUG */ 00163 00164 /* At top we deal with possibility of initial insert that 00165 * comes in at this point, unless the band has wandered off. */ 00166 if (aPos < maxInsert) 00167 { 00168 curScores[curScoreOffset-1].up = initGapScore; 00169 initGapScore -= gapExtend; 00170 } 00171 else 00172 curScores[curScoreOffset-1].up = badScore; 00173 00174 for (bPos = colTop; bPos < colBottom; ++bPos) 00175 { 00176 UBYTE parent; 00177 struct score *curScore = &curScores[curScoreOffset]; 00178 00179 #ifdef DEBUG 00180 uglyf("aPos %d, bPos %d, %c vs %c\n", aPos, bPos, aBase, bStart[bPos]); 00181 uglyf(" diag[%d %d %d], left[%d %d %d], up[%d %d %d]\n", 00182 prevScores[prevScoreOffset-1].match, 00183 prevScores[prevScoreOffset-1].left, 00184 prevScores[prevScoreOffset-1].up, 00185 prevScores[prevScoreOffset].match, 00186 prevScores[prevScoreOffset].left, 00187 prevScores[prevScoreOffset].up, 00188 curScores[curScoreOffset-1].match, 00189 curScores[curScoreOffset-1].left, 00190 curScores[curScoreOffset-1].up); 00191 #endif /* DEBUG */ 00192 00193 /* Handle ways into the matching state and record if it's 00194 * best score so far. */ 00195 { 00196 int match = matRow[(int)bStart[bPos]]; 00197 struct score *a = &prevScores[prevScoreOffset-1]; 00198 int diagScore = a->match; 00199 int leftScore = a->left; 00200 int upScore = a->up; 00201 int score; 00202 if (diagScore >= leftScore && diagScore >= upScore) 00203 { 00204 score = diagScore + match; 00205 parent = mpMatch; 00206 } 00207 else if (leftScore > upScore) 00208 { 00209 score = leftScore + match; 00210 parent = mpLeft; 00211 } 00212 else 00213 { 00214 score = upScore + match; 00215 parent = mpUp; 00216 } 00217 curScore->match = score; 00218 if (score > bestColScore) 00219 { 00220 bestColScore = score; 00221 bestColPos = bPos; 00222 } 00223 } 00224 00225 /* Handle ways into left gap state. */ 00226 { 00227 struct score *a = &prevScores[prevScoreOffset]; 00228 int extScore = a->left - gapExtend; 00229 int openScore = a->match - gapOpen; 00230 if (extScore >= openScore) 00231 { 00232 parent |= lpExt; 00233 curScore->left = extScore; 00234 } 00235 else 00236 { 00237 curScore->left = openScore; 00238 } 00239 } 00240 00241 /* Handle ways into up gap state. */ 00242 { 00243 struct score *a = &curScore[-1]; 00244 int extScore = a->up - gapExtend; 00245 int openScore = a->match - gapOpen; 00246 if (extScore >= openScore) 00247 { 00248 parent |= upExt; 00249 curScore->up = extScore; 00250 } 00251 else 00252 { 00253 curScore->up = openScore; 00254 } 00255 } 00256 00257 parents[curScoreOffset - maxIns1][aPos] = parent; 00258 00259 #ifdef DEBUG 00260 uglyf(" cur [%d %d %d]\n", 00261 curScore->match, 00262 curScore->left, 00263 curScore->up); 00264 #endif /* DEBUG */ 00265 /* Advance to next row in column. */ 00266 curScoreOffset += 1; 00267 prevScoreOffset += 1; 00268 } 00269 00270 00271 /* If this column's score is best so far make note of 00272 * it and shift things so that the matching bases at the 00273 * best score are centered in the band. This allows the 00274 * band to wander where appropriate. Having the band shift 00275 * to the best column position if it is not the best score 00276 * so far doesn't work so well though. */ 00277 if (bestScore < bestColScore) 00278 { 00279 bestScore = bestColScore; 00280 aBestPos = aPos; 00281 bBestPos = bestColPos; 00282 colShift = (bestColPos + 1) - bandCenter; 00283 } 00284 else if (bestColScore < bestScore - maxDrop) 00285 { 00286 if (!global) 00287 break; 00288 } 00289 else 00290 { 00291 colShift = 1; 00292 } 00293 00294 /* Keep track of vertical offset of this column, and 00295 * set up offset of next column. */ 00296 bOffsets[aPos] = bandCenter; 00297 bandCenter += colShift; 00298 00299 /* Swap scoring arrays so current becomes next iteration's previous. */ 00300 swapScores = curScores; 00301 curScores = prevScores; 00302 prevScores = swapScores; 00303 } 00304 00305 00306 /* Trace back. */ 00307 #ifdef DEBUG 00308 uglyf("traceback: bestScore %d, aBestPos %d, bBestPos %d\n", bestScore, aBestPos, bBestPos); 00309 #endif /* DEBUG */ 00310 if (global || bestScore > 0) 00311 { 00312 boolean upState = FALSE; 00313 boolean leftState = FALSE; 00314 didExt = TRUE; 00315 if (global) 00316 { 00317 aPos = aSize-1; 00318 bPos = bSize-1; 00319 } 00320 else 00321 { 00322 aPos = aBestPos; 00323 bPos = bBestPos; 00324 } 00325 for (;;) 00326 { 00327 int pOffset; 00328 UBYTE parent; 00329 pOffset = bPos - bOffsets[aPos] + maxInsert; 00330 if (pOffset < 0) pOffset = 0; 00331 // FIXME: there may be some cases near beginning where 00332 // pOffset is not right. Clamping it at 0 prevents a crash 00333 // and produces correct behavior in many cases. However 00334 // I'm thinking a better fix would be to have bOffsets follow 00335 // colTop rather than bandCenter somehow. Exactly how is 00336 // beyond me at the moment. 00337 if (pOffset >= bandSize) 00338 { 00339 #ifdef DEBUG 00340 uglyf("bPos %d, aPos %d, bOffsets[aPos] %d, maxInsert %d\n", bPos, aPos, bOffsets[aPos], maxInsert); 00341 uglyf("pOffset = %d, bandSize = %d\n", pOffset, bandSize); 00342 uglyf("aSize %d, bSize %d\n", aSize, bSize); 00343 faWriteNext(uglyOut, "uglyA", aStart, aSize); 00344 faWriteNext(uglyOut, "uglyB", bStart, bSize); 00345 #endif /* DEBUG */ 00346 assert(global); 00347 return FALSE; 00348 } 00349 parent = parents[pOffset][aPos]; 00350 #ifdef DEBUG 00351 uglyf("aPos %d, bPos %d, parent %d, pMask %d upState %d, leftState %d\n", aPos, bPos, parent, parent & mpMask, upState, leftState); 00352 #endif /* DEBUG */ 00353 if (upState) 00354 { 00355 retSymA[symCount] = '-'; 00356 retSymB[symCount] = bStart[bPos]; 00357 bPos -= 1; 00358 upState = (parent&upExt); 00359 } 00360 else if (leftState) 00361 { 00362 retSymA[symCount] = aStart[aPos]; 00363 retSymB[symCount] = '-'; 00364 aPos -= 1; 00365 leftState = (parent&lpExt); 00366 } 00367 else 00368 { 00369 retSymA[symCount] = aStart[aPos]; 00370 retSymB[symCount] = bStart[bPos]; 00371 aPos -= 1; 00372 bPos -= 1; 00373 parent &= mpMask; 00374 if (parent == mpUp) 00375 upState = TRUE; 00376 else if (parent == mpLeft) 00377 leftState = TRUE; 00378 } 00379 if (++symCount >= symAlloc) 00380 errAbort("unsufficient symAlloc in bandExt"); 00381 if (aPos < 0 || bPos < 0) 00382 { 00383 while (aPos >= 0) 00384 { 00385 retSymA[symCount] = aStart[aPos]; 00386 retSymB[symCount] = '-'; 00387 if (++symCount >= symAlloc) 00388 errAbort("unsufficient symAlloc in bandExt"); 00389 --aPos; 00390 } 00391 while (bPos >= 0) 00392 { 00393 retSymA[symCount] = '-'; 00394 retSymB[symCount] = bStart[bPos]; 00395 if (++symCount >= symAlloc) 00396 errAbort("unsufficient symAlloc in bandExt"); 00397 --bPos; 00398 } 00399 break; 00400 } 00401 } 00402 if (dir > 0) 00403 { 00404 reverseBytes(retSymA, symCount); 00405 reverseBytes(retSymB, symCount); 00406 } 00407 retSymA[symCount] = 0; 00408 retSymB[symCount] = 0; 00409 } 00410 else 00411 { 00412 retSymA[0] = retSymB[0] = 0; 00413 } 00414 00415 if (dir < 0) 00416 { 00417 reverseBytes(aStart, aSize); 00418 reverseBytes(bStart, bSize); 00419 } 00420 00421 /* Clean up, set return values and go home */ 00422 lmCleanup(&lm); 00423 if (retStartA != NULL) *retStartA = aBestPos; 00424 if (retStartB != NULL) *retStartB = bBestPos; 00425 *retSymCount = symCount; 00426 return didExt; 00427 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct ffAli* bandExtFf | ( | struct lm * | lm, | |
| struct axtScoreScheme * | ss, | |||
| int | maxInsert, | |||
| struct ffAli * | origFf, | |||
| char * | nStart, | |||
| char * | nEnd, | |||
| char * | hStart, | |||
| char * | hEnd, | |||
| int | dir, | |||
| int | maxExt | |||
| ) | [read] |
Definition at line 429 of file bandExt.c.
References bandExt(), FALSE, ffAliFromSym(), freeMem(), ffAli::hEnd, ffAli::hStart, lm, needMem(), ffAli::nEnd, ffAli::nStart, and ss.
Referenced by bandExtAfter(), and bandExtBefore().
00440 { 00441 int symAlloc = 2*maxExt; 00442 char *symBuf = needMem(4*maxExt); 00443 char *nBuf = symBuf; 00444 char *hBuf = symBuf + symAlloc; 00445 char *ns, *hs; 00446 int symCount, nExt, hExt; 00447 int nSize, hSize; 00448 struct ffAli *ffList = NULL; 00449 boolean gotExt; 00450 00451 if (dir > 0) 00452 { 00453 nSize = nEnd - origFf->nEnd; 00454 hSize = hEnd - origFf->hEnd; 00455 if (nSize > maxExt) nSize = maxExt; 00456 if (hSize > maxExt) hSize = maxExt; 00457 ns = origFf->nEnd; 00458 hs = origFf->hEnd; 00459 } 00460 else 00461 { 00462 nSize = origFf->nStart - nStart; 00463 hSize = origFf->hStart - hStart; 00464 if (nSize > maxExt) nSize = maxExt; 00465 if (hSize > maxExt) hSize = maxExt; 00466 ns = origFf->nStart - nSize; 00467 hs = origFf->hStart - hSize; 00468 } 00469 00470 gotExt = bandExt(FALSE, ss, maxInsert, ns, nSize, hs, hSize, dir, 00471 symAlloc, &symCount, nBuf, hBuf, &nExt, &hExt); 00472 if (gotExt) 00473 { 00474 char *nExtStart, *hExtStart; 00475 if (dir > 0) 00476 { 00477 nExtStart = ns; 00478 hExtStart = hs; 00479 } 00480 else 00481 { 00482 nExtStart = origFf->nStart - nExt - 1; 00483 hExtStart = origFf->hStart - hExt - 1; 00484 } 00485 ffList = ffAliFromSym(symCount, nBuf, hBuf, lm, nExtStart, hExtStart); 00486 } 00487 freeMem(symBuf); 00488 return ffList; 00489 }
Here is the call graph for this function:

Here is the caller graph for this function:

char const rcsid[] = "$Id: bandExt.c,v 1.13 2006/03/14 19:02:31 angie Exp $" [static] |
1.5.2