inc/bandExt.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

boolean bandExt (boolean global, struct axtScoreScheme *ss, int maxInsert, char *aStart, int aSize, char *bStart, int bSize, int dir, int symAlloc, int *retSymCount, char *retSymA, char *retSymB, int *retRevStartA, int *retRevStartB)
ffAlibandExtFf (struct lm *lm, struct axtScoreScheme *ss, int maxInsert, struct ffAli *origFf, char *nStart, char *nEnd, char *hStart, char *hEnd, int dir, int maxExt)


Function Documentation

boolean bandExt ( boolean  global,
struct axtScoreScheme ss,
int  maxInsert,
char *  aStart,
int  aSize,
char *  bStart,
int  bSize,
int  dir,
int  symAlloc,
int *  retSymCount,
char *  retSymA,
char *  retSymB,
int *  retRevStartA,
int *  retRevStartB 
)

Definition at line 45 of file bandExt.c.

References errAbort(), FALSE, faWriteNext(), score::left, lm, lmAllocArray, lmCleanup(), lmInit(), lpExt, score::match, mpLeft, mpMask, mpMatch, mpUp, mustWrite(), reverseBytes(), ss, TRUE, UBYTE, uglyf, uglyOut, score::up, and upExt.

Referenced by bandExtFf(), hardRefineSplice(), and smoothOneGap().

00058 {
00059 int i;                  /* A humble index or two. */
00060 int *bOffsets = NULL;   /* Offset of top of band. */
00061 UBYTE **parents = NULL; /* Array of parent positions. */
00062 struct score *curScores = NULL; /* Scores for current column. */
00063 struct score *prevScores = NULL;        /* Scores for previous column. */
00064 struct score *swapScores;       /* Helps to swap cur & prev column. */
00065 int bandSize = 2*maxInsert + 1;  /* Size of band including middle */
00066 int maxIns1 = maxInsert + 1;     /* Max insert plus one. */
00067 int bandPlus = bandSize + 2*maxIns1;  /* Band plus sentinels on either end. */
00068 int bestScore = 0;               /* Best score so far. */
00069 int aPos, aBestPos = -1;         /* Current and best position in a. */
00070 int bPos, bBestPos = -1;         /* Current and best position in b. */
00071 int bandCenter = 0;     /* b Coordinate of current band center. */
00072 int colShift = 1;       /* Vertical shift between this column and previous. */
00073 int prevScoreOffset;    /* Offset into previous score column. */
00074 int curScoreOffset;     /* Offset into current score column. */
00075 int symCount = 0;       /* Size of alignment and size allocated for it. */
00076 int gapOpen = ss->gapOpen;       /* First base in gap costs this. */
00077 int gapExtend = ss->gapExtend;   /* Extra bases in gap cost this. */
00078 int badScore = -gapOpen*100;     /* A score bad enough no one will want to link with us. */
00079 int maxDrop = gapOpen + gapExtend*maxInsert; /* Max score drop allowed before giving up. */
00080 int midScoreOff;                 /* Offset to middle of scoring array. */
00081 struct lm *lm;                   /* Local memory pool. */
00082 boolean didExt = FALSE;
00083 int initGapScore = -gapOpen;
00084 
00085 /* For reverse direction just reverse bytes here and there.  It's
00086  * a lot easier than the alternative and doesn't cost much time in
00087  * the global scheme of things. */
00088 if (dir < 0)
00089     {
00090     reverseBytes(aStart, aSize);
00091     reverseBytes(bStart, bSize);
00092     }
00093 #ifdef DEBUG
00094 uglyf("bandExt: dir %d, aStart %d, aSize %d, symAlloc %d\n", dir,
00095         aSize, bSize, symAlloc);
00096 mustWrite(uglyOut, aStart, aSize);
00097 uglyf("\n");
00098 mustWrite(uglyOut, bStart, bSize);
00099 uglyf("\n");
00100 #endif /* DEBUG */
00101 
00102 /* Make up a local mem structure big enough for everything. 
00103  * This is just a minor, perhaps misguided speed tweak to
00104  * avoid multiple mallocs. */
00105 lm = lmInit(
00106     sizeof(bOffsets[0])*aSize +
00107     sizeof(parents[0])*bandSize +
00108     bandSize*(sizeof(parents[0][0])*aSize) +
00109     sizeof(curScores[0]) * bandPlus +
00110     sizeof(prevScores[0]) * bandPlus );
00111 
00112 /* Allocate data structures out of local memory pool. */
00113 lmAllocArray(lm, bOffsets, aSize);
00114 lmAllocArray(lm, parents, bandSize);
00115 for (i=0; i<bandSize; ++i)
00116     lmAllocArray(lm, parents[i], aSize);
00117 lmAllocArray(lm, curScores, bandPlus);
00118 lmAllocArray(lm, prevScores, bandPlus);
00119 
00120 /* Set up scoring arrays so that stuff outside of the band boundary 
00121  * looks bad.  There will be maxIns+1 of these sentinel values at
00122  * the start and at the beginning. */
00123 for (i=0; i<bandPlus; ++i)
00124     {
00125     struct score *cs = &curScores[i];
00126     cs->match = cs->up = cs->left = badScore;
00127     cs = &prevScores[i];
00128     cs->match = cs->up = cs->left = badScore;
00129     }
00130 
00131 /* Set up scoring array so that extending without an initial insert
00132  * looks relatively good. */
00133 midScoreOff = 1 + 2 * maxInsert;
00134 prevScores[midScoreOff].match = 0;
00135 
00136 /* Set up scoring array so that initial inserts of up to maxInsert
00137  * are allowed but penalized appropriately. */
00138     {
00139     int score = -gapOpen;
00140     for (i=0; i<maxInsert; ++i)
00141         {
00142         prevScores[midScoreOff+i].up = score;
00143         score -= gapExtend;
00144         }
00145     }
00146 
00147 for (aPos=0; aPos < aSize; ++aPos)
00148     {
00149     char aBase = aStart[aPos];
00150     int *matRow = ss->matrix[(int)aBase];
00151     int bestColScore = badScore;
00152     int bestColPos = -1;
00153     int colTop = bandCenter - maxInsert;
00154     int colBottom = bandCenter + maxIns1;
00155 
00156     if (colTop < 0) colTop = 0;
00157     if (colBottom > bSize) colBottom = bSize;
00158     curScoreOffset =  maxIns1 + colTop - (bandCenter - maxInsert);
00159     prevScoreOffset = curScoreOffset + colShift;
00160 #ifdef DEBUG
00161     uglyf("curScoreOffset %d, maxIns %d, prevScoreOffset %d\n", curScoreOffset, maxInsert, prevScoreOffset);
00162 #endif /* DEBUG */
00163 
00164     /* At top we deal with possibility of initial insert that
00165      * comes in at this point, unless the band has wandered off. */
00166     if (aPos < maxInsert)
00167         {
00168         curScores[curScoreOffset-1].up = initGapScore;
00169         initGapScore -= gapExtend;
00170         }
00171     else
00172         curScores[curScoreOffset-1].up = badScore;
00173 
00174     for (bPos = colTop; bPos < colBottom; ++bPos)
00175         {
00176         UBYTE parent;
00177         struct score *curScore = &curScores[curScoreOffset];
00178 
00179 #ifdef DEBUG
00180         uglyf("aPos %d, bPos %d, %c vs %c\n", aPos, bPos, aBase, bStart[bPos]);
00181         uglyf("  diag[%d %d %d], left[%d %d %d], up[%d %d %d]\n", 
00182                 prevScores[prevScoreOffset-1].match, 
00183                 prevScores[prevScoreOffset-1].left, 
00184                 prevScores[prevScoreOffset-1].up, 
00185                 prevScores[prevScoreOffset].match, 
00186                 prevScores[prevScoreOffset].left, 
00187                 prevScores[prevScoreOffset].up, 
00188                 curScores[curScoreOffset-1].match, 
00189                 curScores[curScoreOffset-1].left, 
00190                 curScores[curScoreOffset-1].up);
00191 #endif /* DEBUG */
00192 
00193         /* Handle ways into the matching state and record if it's
00194          * best score so far. */
00195             {
00196             int match = matRow[(int)bStart[bPos]];
00197             struct score *a = &prevScores[prevScoreOffset-1];
00198             int diagScore = a->match;
00199             int leftScore = a->left;
00200             int upScore = a->up;
00201             int score;
00202             if (diagScore >= leftScore && diagScore >= upScore)
00203                 {
00204                 score = diagScore + match;
00205                 parent = mpMatch;
00206                 }
00207             else if (leftScore > upScore)
00208                 {
00209                 score = leftScore + match;
00210                 parent = mpLeft;
00211                 }
00212             else
00213                 {
00214                 score = upScore + match;
00215                 parent = mpUp;
00216                 }
00217             curScore->match = score;
00218             if (score > bestColScore)
00219                 {
00220                 bestColScore = score;
00221                 bestColPos = bPos;
00222                 }
00223             }
00224 
00225         /* Handle ways into left gap state. */
00226             {
00227             struct score *a = &prevScores[prevScoreOffset];
00228             int extScore = a->left - gapExtend;
00229             int openScore = a->match - gapOpen;
00230             if (extScore >= openScore)
00231                 {
00232                 parent |= lpExt;
00233                 curScore->left = extScore;
00234                 }
00235             else
00236                 {
00237                 curScore->left = openScore;
00238                 }
00239             }
00240 
00241         /* Handle ways into up gap state. */
00242             {
00243             struct score *a = &curScore[-1];
00244             int extScore = a->up - gapExtend;
00245             int openScore = a->match - gapOpen;
00246             if (extScore >= openScore)
00247                 {
00248                 parent |= upExt;
00249                 curScore->up = extScore;
00250                 }
00251             else
00252                 {
00253                 curScore->up = openScore;
00254                 }
00255             }
00256 
00257         parents[curScoreOffset - maxIns1][aPos] = parent;
00258 
00259 #ifdef DEBUG
00260         uglyf(" cur [%d %d %d]\n", 
00261                 curScore->match, 
00262                 curScore->left, 
00263                 curScore->up);
00264 #endif /* DEBUG */
00265         /* Advance to next row in column. */
00266         curScoreOffset += 1;
00267         prevScoreOffset += 1;
00268         }
00269 
00270 
00271     /* If this column's score is best so far make note of
00272      * it and shift things so that the matching bases at the
00273      * best score are centered in the band.  This allows the
00274      * band to wander where appropriate.  Having the band shift
00275      * to the best column position if it is not the best score
00276      * so far doesn't work so well though.  */
00277     if (bestScore < bestColScore)
00278         {
00279         bestScore = bestColScore;
00280         aBestPos = aPos;
00281         bBestPos = bestColPos;
00282         colShift = (bestColPos + 1) - bandCenter;
00283         }
00284     else if (bestColScore < bestScore - maxDrop)
00285         {
00286         if (!global)
00287             break;
00288         }
00289     else
00290         {
00291         colShift = 1;
00292         }
00293 
00294     /* Keep track of vertical offset of this column, and
00295      * set up offset of next column. */
00296     bOffsets[aPos] = bandCenter;
00297     bandCenter += colShift;
00298 
00299     /* Swap scoring arrays so current becomes next iteration's previous. */
00300     swapScores = curScores;
00301     curScores = prevScores;
00302     prevScores = swapScores;
00303     }
00304 
00305 
00306 /* Trace back. */
00307 #ifdef DEBUG
00308 uglyf("traceback: bestScore %d, aBestPos %d, bBestPos %d\n", bestScore, aBestPos, bBestPos);
00309 #endif /* DEBUG */
00310 if (global || bestScore > 0)
00311     {
00312     boolean upState = FALSE;
00313     boolean leftState = FALSE;
00314     didExt = TRUE;
00315     if (global)
00316         {
00317         aPos = aSize-1;
00318         bPos = bSize-1;
00319         }
00320     else
00321         {
00322         aPos = aBestPos;
00323         bPos = bBestPos;
00324         }
00325     for (;;)
00326         {
00327         int pOffset;
00328         UBYTE parent;
00329         pOffset = bPos - bOffsets[aPos] + maxInsert;
00330         if (pOffset < 0) pOffset = 0;
00331         // FIXME: there may be some cases near beginning where
00332         // pOffset is not right.  Clamping it at 0 prevents a crash
00333         // and produces correct behavior in many cases.  However
00334         // I'm thinking a better fix would be to have bOffsets follow
00335         // colTop rather than bandCenter somehow. Exactly how is
00336         // beyond me at the moment.
00337         if (pOffset >= bandSize)
00338             {
00339 #ifdef DEBUG
00340             uglyf("bPos %d, aPos %d, bOffsets[aPos] %d, maxInsert %d\n", bPos, aPos, bOffsets[aPos], maxInsert);
00341             uglyf("pOffset = %d, bandSize = %d\n", pOffset, bandSize);
00342             uglyf("aSize %d, bSize %d\n", aSize, bSize);
00343             faWriteNext(uglyOut, "uglyA", aStart, aSize);
00344             faWriteNext(uglyOut, "uglyB", bStart, bSize);
00345 #endif /* DEBUG */
00346             assert(global);
00347             return FALSE;
00348             }
00349         parent = parents[pOffset][aPos];
00350 #ifdef DEBUG
00351         uglyf("aPos %d, bPos %d, parent %d, pMask %d upState %d, leftState %d\n", aPos, bPos, parent, parent & mpMask, upState, leftState);
00352 #endif /* DEBUG */
00353         if (upState)
00354             {
00355             retSymA[symCount] = '-';
00356             retSymB[symCount] = bStart[bPos];
00357             bPos -= 1;
00358             upState = (parent&upExt);
00359             }
00360         else if (leftState)
00361             {
00362             retSymA[symCount] = aStart[aPos];
00363             retSymB[symCount] = '-';
00364             aPos -= 1;
00365             leftState = (parent&lpExt);
00366             }
00367         else
00368             {
00369             retSymA[symCount] = aStart[aPos];
00370             retSymB[symCount] = bStart[bPos];
00371             aPos -= 1;
00372             bPos -= 1;
00373             parent &= mpMask;
00374             if (parent == mpUp)
00375                 upState = TRUE;
00376             else if (parent == mpLeft)
00377                 leftState = TRUE;
00378             }
00379         if (++symCount >= symAlloc)
00380             errAbort("unsufficient symAlloc in bandExt");
00381         if (aPos < 0 || bPos < 0)
00382             {
00383             while (aPos >= 0)
00384                 {
00385                 retSymA[symCount] = aStart[aPos];
00386                 retSymB[symCount] = '-';
00387                 if (++symCount >= symAlloc)
00388                     errAbort("unsufficient symAlloc in bandExt");
00389                 --aPos;
00390                 }
00391             while (bPos >= 0)
00392                 {
00393                 retSymA[symCount] = '-';
00394                 retSymB[symCount] = bStart[bPos];
00395                 if (++symCount >= symAlloc)
00396                     errAbort("unsufficient symAlloc in bandExt");
00397                 --bPos;
00398                 }
00399             break;
00400             }
00401         }
00402     if (dir > 0)
00403         {
00404         reverseBytes(retSymA, symCount);
00405         reverseBytes(retSymB, symCount);
00406         }
00407     retSymA[symCount] = 0;
00408     retSymB[symCount] = 0;
00409     }
00410 else
00411     {
00412     retSymA[0] = retSymB[0] = 0;
00413     }
00414 
00415 if (dir < 0)
00416     {
00417     reverseBytes(aStart, aSize);
00418     reverseBytes(bStart, bSize);
00419     }
00420 
00421 /* Clean up, set return values and go home */
00422 lmCleanup(&lm);
00423 if (retStartA != NULL) *retStartA = aBestPos;
00424 if (retStartB != NULL) *retStartB = bBestPos;
00425 *retSymCount = symCount;
00426 return didExt;
00427 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct ffAli* bandExtFf ( struct lm lm,
struct axtScoreScheme ss,
int  maxInsert,
struct ffAli origFf,
char *  nStart,
char *  nEnd,
char *  hStart,
char *  hEnd,
int  dir,
int  maxExt 
) [read]

Definition at line 429 of file bandExt.c.

References bandExt(), FALSE, ffAliFromSym(), freeMem(), ffAli::hEnd, ffAli::hStart, lm, needMem(), ffAli::nEnd, ffAli::nStart, and ss.

Referenced by bandExtAfter(), and bandExtBefore().

00440 {
00441 int symAlloc = 2*maxExt;
00442 char *symBuf = needMem(4*maxExt);
00443 char *nBuf = symBuf;
00444 char *hBuf = symBuf + symAlloc;
00445 char *ns, *hs;
00446 int symCount, nExt, hExt;
00447 int nSize, hSize;
00448 struct ffAli *ffList = NULL;
00449 boolean gotExt;
00450 
00451 if (dir > 0)
00452     {
00453     nSize = nEnd - origFf->nEnd;
00454     hSize = hEnd - origFf->hEnd;
00455     if (nSize > maxExt) nSize = maxExt;
00456     if (hSize > maxExt) hSize = maxExt;
00457     ns = origFf->nEnd;
00458     hs = origFf->hEnd;
00459     }
00460 else
00461     {
00462     nSize = origFf->nStart - nStart;
00463     hSize = origFf->hStart - hStart;
00464     if (nSize > maxExt) nSize = maxExt;
00465     if (hSize > maxExt) hSize = maxExt;
00466     ns = origFf->nStart - nSize;
00467     hs = origFf->hStart - hSize;
00468     }
00469 
00470 gotExt = bandExt(FALSE, ss, maxInsert, ns, nSize, hs, hSize, dir,
00471         symAlloc, &symCount, nBuf, hBuf, &nExt, &hExt);
00472 if (gotExt)
00473     {
00474     char *nExtStart, *hExtStart;
00475     if (dir > 0)
00476         {
00477         nExtStart = ns;
00478         hExtStart = hs;
00479         }
00480     else
00481         {
00482         nExtStart = origFf->nStart - nExt - 1;
00483         hExtStart = origFf->hStart - hExt - 1;
00484         }
00485     ffList = ffAliFromSym(symCount, nBuf, hBuf, lm,  nExtStart, hExtStart);
00486     }
00487 freeMem(symBuf);
00488 return ffList;
00489 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Tue Dec 25 18:41:59 2007 for blat by  doxygen 1.5.2