lib/axtAffine.c

Go to the documentation of this file.
00001 /* axtAffine - do alignment of two (shortish) sequences with
00002  * affine gap scoring, and return the result as an axt. 
00003  * This file is copyright 2000-2004 Jim Kent, but license is hereby
00004  * granted for all use - public, private or commercial. */
00005 
00006 #include "common.h"
00007 #include "pairHmm.h"
00008 #include "axt.h"
00009 
00010 static char const rcsid[] = "$Id: axtAffine.c,v 1.7 2005/04/10 14:41:20 markd Exp $";
00011 
00012 
00013 boolean axtAffineSmallEnough(double querySize, double targetSize)
00014 /* Return TRUE if it is reasonable to align sequences of given sizes
00015  * with axtAffine. */
00016 {
00017 return targetSize * querySize <= 1.0E8;
00018 }
00019 
00020 static void affineAlign(char *query, int querySize, 
00021         char *target, int targetSize, struct axtScoreScheme *ss,
00022         struct phmmMatrix **retMatrix, struct phmmAliPair **retPairList,
00023         int *retScore)
00024 /* Use dynamic programming to do alignment including affine gap
00025  * scores. */
00026 {
00027 struct phmmMatrix *a;
00028 struct phmmState *hf, *iq, *it;
00029 int qIx, tIx, sIx;  /* Query, target, and state indices */
00030 int rowOffset, newCellOffset;
00031 int bestScore = -0x4fffffff;
00032 struct phmmMommy *bestCell = NULL;
00033 int matchPair;
00034 int gapStart, gapExt;
00035 
00036 /* Check that it's not too big. */
00037 if (!axtAffineSmallEnough(querySize, targetSize))
00038     errAbort("Can't align %d x %d, too big\n", querySize, targetSize);
00039 
00040 gapStart = -ss->gapOpen;
00041 gapExt = -ss->gapExtend;
00042 
00043 /* Initialize 3 state matrix (match, query insert, target insert). */
00044 a = phmmMatrixNew(3, query, querySize, target, targetSize);
00045 hf = phmmNameState(a, 0, "match", 'M');
00046 iq = phmmNameState(a, 1, "qSlip", 'Q');
00047 it = phmmNameState(a, 2, "tSlip", 'T');
00048 
00049 for (tIx = 1; tIx < a->tDim; tIx += 1)
00050     {
00051     UBYTE mommy = 0;
00052     int score, tempScore;
00053 
00054 /* Macros to make me less mixed up when accessing scores from row arrays.*/
00055 #define matchScore lastScores[qIx-1]
00056 #define qSlipScore lastScores[qIx]
00057 #define tSlipScore scores[qIx-1]
00058 #define newScore scores[qIx]
00059 
00060 /* Start up state block (with all ways to enter state) */
00061 #define startState(state) \
00062    score = 0;
00063 
00064 /* Define a transition from state while advancing over both
00065  * target and query. */
00066 #define matchState(state, addScore) \
00067    { \
00068    if ((tempScore = state->matchScore + addScore) > score) \
00069         { \
00070         mommy = phmmPackMommy(state->stateIx, -1, -1); \
00071         score = tempScore; \
00072         } \
00073    } 
00074 
00075 /* Define a transition from state while slipping query
00076  * and advancing target. */
00077 #define qSlipState(state, addScore) \
00078    { \
00079    if ((tempScore = state->qSlipScore + addScore) > score) \
00080         { \
00081         mommy = phmmPackMommy(state->stateIx, 0, -1); \
00082         score = tempScore; \
00083         } \
00084    }
00085 
00086 /* Define a transition from state while slipping target
00087  * and advancing query. */
00088 #define tSlipState(state, addScore) \
00089    { \
00090    if ((tempScore = state->tSlipScore + addScore) > score) \
00091         { \
00092         mommy = phmmPackMommy(state->stateIx, -1, 0); \
00093         score = tempScore; \
00094         } \
00095    }
00096 
00097 /* End a block of transitions into state. */
00098 #define endState(state) \
00099     { \
00100     struct phmmMommy *newCell = state->cells + newCellOffset; \
00101     if (score <= 0) \
00102         { \
00103         mommy = phmmNullMommy; \
00104         score = 0; \
00105         } \
00106     newCell->mommy = mommy; \
00107     state->newScore = score; \
00108     if (score > bestScore) \
00109         { \
00110         bestScore = score; \
00111         bestCell = newCell; \
00112         } \
00113     } 
00114 
00115 /* End a state that you know won't produce an optimal
00116  * final score. */
00117 #define shortEndState(state) \
00118     { \
00119     struct phmmMommy *newCell = state->cells + newCellOffset; \
00120     if (score <= 0) \
00121         { \
00122         mommy = phmmNullMommy; \
00123         score = 0; \
00124         } \
00125     newCell->mommy = mommy; \
00126     state->newScore = score; \
00127     }
00128 
00129 
00130     rowOffset = tIx*a->qDim;
00131     for (qIx = 1; qIx < a->qDim; qIx += 1)
00132         {
00133         newCellOffset = rowOffset + qIx;
00134         
00135         /* Figure the cost or bonus for pairing target and query residue here. */
00136         matchPair = ss->matrix[(int)a->query[qIx-1]][(int)a->target[tIx-1]];
00137 
00138         /* Update hiFi space. */
00139             {
00140             startState(hf);
00141             matchState(hf, matchPair);
00142             matchState(iq, matchPair);
00143             matchState(it, matchPair);
00144             endState(hf);
00145             }
00146 
00147         /* Update query slip space. */
00148             {
00149             startState(iq);
00150             qSlipState(iq, gapExt);
00151             qSlipState(hf, gapStart);            
00152             qSlipState(it, gapStart);   /* Allow double gaps, T first always. */
00153             shortEndState(iq);
00154             }
00155         
00156         /* Update target slip space. */
00157             {
00158             startState(it);
00159             tSlipState(it, gapExt);
00160             tSlipState(hf, gapStart);            
00161             shortEndState(it);
00162             }
00163 
00164         }
00165     /* Swap score columns so current becomes last, and last gets
00166      * reused. */
00167     for (sIx = 0; sIx < a->stateCount; ++sIx)
00168         {
00169         struct phmmState *as = &a->states[sIx];
00170         int *swapTemp = as->lastScores;
00171         as->lastScores = as->scores;
00172         as->scores = swapTemp;
00173         }
00174     }
00175 
00176 /* Trace back from best scoring cell. */
00177 *retPairList = phmmTraceBack(a, bestCell);
00178 *retMatrix = a;
00179 *retScore = bestScore;
00180 
00181 #undef matchScore
00182 #undef qSlipScore
00183 #undef tSlipScore
00184 #undef newScore
00185 #undef startState
00186 #undef matchState
00187 #undef qSlipState
00188 #undef tSlipState
00189 #undef shortEndState
00190 #undef endState
00191 }
00192 
00193 struct axt *axtAffine(bioSeq *query, bioSeq *target, struct axtScoreScheme *ss)
00194 /* Return alignment if any of query and target using scoring scheme. */
00195 {
00196 struct axt *axt;
00197 int score;
00198 struct phmmMatrix *matrix;
00199 struct phmmAliPair *pairList;
00200 
00201 affineAlign(query->dna, query->size, target->dna, target->size, ss,
00202         &matrix, &pairList, &score);
00203 axt = phhmTraceToAxt(matrix, pairList, score, query->name, target->name);
00204 phmmMatrixFree(&matrix);
00205 slFreeList(&pairList);
00206 return axt;
00207 }
00208 
00209 
00210 /* ----- axtAffine2Level begins ----- 
00211 
00212  Written by Galt Barber, December 2004
00213  I wrote this on my own time and am donating this
00214  to the public domain.  The original concept 
00215  was Don Speck's, as described by Kevin Karplus.
00216 
00217  @article{Grice97,
00218      author = "J. A. Grice and R. Hughey and D. Speck",
00219      title = "Reduced space sequence alignment",
00220      journal = cabios,
00221      volume=13,
00222      number=1,
00223      year=1997,
00224      month=feb,
00225      pages="45-53"
00226      }
00227                                                                  
00228 
00229 */
00230 
00231 #define WORST 0xC0000000 /* WORST Score approx neg. inf. 0x80000000 overflowed, reduced by half */
00232 
00233 
00234 /* m d i notation: match, delete, insert in query */
00235 
00236 struct cell2L 
00237 {
00238 int  bestm;   /* best score array */
00239 int  bestd;                         
00240 int  besti;                          
00241 char backm;   /* back trace array */
00242 char backd;                         
00243 char backi;                          
00244 };
00245 
00246 #ifdef DEBUG
00247 void dump2L(struct cell2L* c)
00248 /* print matrix cell for debugging 
00249    I redirect output to a file 
00250    and look at it with a web browser
00251    to see the long lines
00252 */
00253 {
00254     printf("%04d%c %04d%c %04d%c   ",
00255      c->bestd, c->backd,
00256      c->bestm, c->backm,
00257      c->besti, c->backi
00258      );
00259 }     
00260 #endif
00261 
00262 void kForwardAffine(
00263 struct cell2L *cells,  /* dyn prg arr cells */
00264 int row,      /* starting row base */
00265 int rowmax,   /* ending row */
00266 int rdelta,   /* convert between real targ seq row and logical row */                             
00267 int cmost,    /* track right edge, shrink as traces back */
00268 int lv,       /* width of array including sentinel col 0 */ 
00269 char *q,      /* query and target seqs */
00270 char *t,
00271 struct axtScoreScheme *ss,  /* score scheme passed in */
00272 int *bestbestOut,  /* return best overall found, and it's row and col */
00273 int *bestrOut,
00274 int *bestcOut,
00275 char *bestdirOut
00276 )
00277 /*
00278 Calculates filling dynprg mtx forward.
00279  Called 3 times from affine2Level.
00280 
00281 row is offset into the actual best and back arrays,
00282  so rdelta serves as a conversion between
00283  the real target seq row and the logical row
00284  used in best and back arrays.
00285 
00286 cmost is a column limiter that lets us avoid
00287  unused areas of the array when doing the
00288  backtrace 2nd pass. This can be an average
00289  of half of the total array saved.
00290 
00291 */
00292 {
00293 int r=0, rr=0;
00294 int gapOpen  =ss->gapOpen;    
00295 int gapExtend=ss->gapExtend;  
00296 int doubleGap=ss->gapExtend;  // this can be gapOpen or gapExtend, or custom ?
00297 struct cell2L *cellp,*cellc;  /* current and previous row base */
00298 struct cell2L *u,*d,*l,*s;    /* up,diag,left,self pointers to hopefully speed things up */
00299 int c=0;
00300 int bestbest = *bestbestOut; /* make local copy of best best */
00301 cellc = cells+(row-1)*lv;     /* start it off one row back coming into loop */
00302 
00303 #ifdef DEBUG
00304 for(c=0;c<=cmost;c++) /* show prev row */
00305     { dump2L(cellc+c); }
00306 printf("\n");
00307 #endif
00308 
00309 for(r=row; r<=rowmax; r++)
00310     {
00311     cellp = cellc;
00312     cellc += lv;    /* initialize pointers to curr and prev rows */
00313     
00314     rr = r+rdelta;
00315 
00316     d = cellp;   /* diag is prev row, prev col */
00317     l = cellc;   /* left is curr row, prev col */
00318     u = d+1;     /*   up is prev row, curr col */
00319     s = l+1;     /* self is curr row, curr col */
00320     
00321     /* handle col 0 sentinel as a delete */
00322     l->bestm=WORST; 
00323     l->bestd=d->bestd-gapExtend;
00324     l->besti=WORST;                 
00325     l->backm='x';
00326     l->backd='d';
00327     l->backi='x';
00328     if (rr==1)    /* special case row 1 col 0 */
00329         {
00330         l->bestd=-gapOpen;
00331         l->backd='m';
00332         }
00333 #ifdef DEBUG
00334     dump2L(cellc); 
00335 #endif
00336     
00337     for(c=1; c<=cmost; c++)
00338         {
00339 
00340         int best=WORST;
00341         int try  =WORST;
00342         char dir=' ';
00343         /* note: is matrix symmetrical? if not we could have dim 1 and 2 backwards */
00344         int subst = ss->matrix[(int)q[c-1]][(int)t[rr-1]];  /* score for pairing target and query. */
00345 
00346         /* find best M match query and target */
00347         best=WORST;
00348         try=d->bestd;    
00349         if (try > best)
00350             {
00351             best=try;
00352             dir='d';
00353             }
00354         try=d->bestm;   
00355         if (try > best)
00356             {
00357             best=try;
00358             dir='m';
00359             }
00360         try=d->besti;   
00361         if (try > best)
00362             {
00363             best=try;
00364             dir='i';
00365             }
00366         try=0;                   /* local ali can start anywhere */
00367         if (try > best)
00368             {
00369             best=try;
00370             dir='s';         
00371             }
00372         best += subst;
00373         s->bestm = best;
00374         s->backm = dir;
00375         if (best > bestbest)
00376             {
00377             bestbest=best;
00378             *bestbestOut=best;
00379             *bestrOut=rr;
00380             *bestcOut=c;
00381             *bestdirOut=dir;
00382             }
00383 
00384         /* find best D delete in query */
00385         best=WORST;
00386         try=u->bestd - gapExtend;
00387         if (try > best)
00388             {
00389             best=try;
00390             dir='d';
00391             }
00392         try=u->bestm - gapOpen;    
00393         if (try > best)
00394             {
00395             best=try;
00396             dir='m';
00397             }
00398         try=u->besti - doubleGap;    
00399         if (try > best)
00400             {
00401             best=try;
00402             dir='i';
00403             }
00404         s->bestd = best;
00405         s->backd = dir;
00406         if (best > bestbest)
00407             {
00408             bestbest=best;
00409             *bestbestOut=best;
00410             *bestrOut=rr;
00411             *bestcOut=c;
00412             *bestdirOut=dir;
00413             }
00414 
00415         /* find best I insert in query */
00416         best=WORST;
00417         try=l->bestd - doubleGap;
00418         if (try > best)
00419             {
00420             best=try;
00421             dir='d';
00422             }
00423         try=l->bestm - gapOpen;    
00424         if (try > best)
00425             {
00426             best=try;
00427             dir='m';
00428             }
00429         try=l->besti - gapExtend;    
00430         if (try > best)
00431             {
00432             best=try;
00433             dir='i';
00434             }
00435         s->besti = best;
00436         s->backi = dir;
00437         if (best > bestbest)
00438             {
00439             bestbest=best;
00440             *bestbestOut=best;
00441             *bestrOut=rr;
00442             *bestcOut=c;
00443             *bestdirOut=dir;
00444             }
00445 
00446 #ifdef DEBUG
00447     dump2L(cellc+c); 
00448 #endif
00449 
00450         d++;l++;u++;s++;
00451 
00452         }
00453 #ifdef DEBUG
00454 printf("\n");
00455 #endif
00456   
00457     }
00458 } 
00459 
00460 
00461 
00462 struct axt *axtAffine2Level(bioSeq *query, bioSeq *target, struct axtScoreScheme *ss)
00463 /* 
00464 
00465    (Moving boundary version, allows target T size twice as large in same ram)
00466 
00467    Return alignment if any of query and target using scoring scheme. 
00468    
00469    2Level uses an economical amount of ram and should work for large target sequences.
00470    
00471    If Q is query size and T is target size and M is memory size, then
00472    Total memory used M = 30*Q*sqrt(T).  When the target is much larger than the query
00473    this method saves ram, and average runtime is only 50% greater, or 1.5 QT.  
00474    If Q=5000 and T=245,522,847 for hg17 chr1, then M = 2.2 GB ram.  
00475    axtAffine would need M=3QT = 3.4 TB.
00476    Of course massive alignments will be painfully slow anyway.
00477 
00478    Works for protein as well as DNA given the correct scoreScheme.
00479   
00480    NOTES:
00481    Double-gap cost is equal to gap-extend cost, but gap-open would also work.
00482    On very large target, score integer may overflow.
00483    Input sequences not checked for invalid chars.
00484    Input not checked but query should be shorter than target.
00485    
00486 */
00487 {
00488 struct axt *axt=needMem(sizeof(struct axt));
00489 
00490 char *q = query->dna;
00491 char *t = target->dna;
00492 
00493 int Q= query->size;
00494 int T=target->size;
00495 int lv=Q+1;                    /* Q+1 is used so often let's call it lv for q-width */
00496 int lw=T+1;                    /* T+1 is used so often let's call it lw for t-height */
00497 
00498 
00499 int r = 0;                                 /* row matrix index */
00500 int c = 0;                                 /* col matrix index */
00501 char dir=' ';                              /* dir for bt */
00502 int bestbest = WORST;                      /* best score in entire mtx */
00503 
00504 int k=0;                                   /* save every kth row (k decreasing) */
00505 int ksize = 0;                             /* T+1 saved rows as ksize, ksize-1,...,1*/
00506 int arrsize = 0;                           /* dynprg array size, +1 for 0 sentinel col. */
00507 struct cell2L *cells = NULL;               /* best score dyn prog array */
00508 int ki = 0;                                /* base offset into array */
00509 int cmost = Q;                             /* track right edge shrinkage during backtrace */
00510 int kmax = 0;                              /* rows range from ki to kmax */
00511 int rr = 0;                                /* maps ki base to actual target seq */
00512 int nrows = 0;                             /* num rows to do, usually k or less */
00513 int bestr = 0;                             /* remember best r,c,dir for local ali */
00514 int bestc = 0;           
00515 char bestdir = 0;
00516 int temp = 0;
00517 
00518 
00519 char *btq=NULL;      /* temp pointers to track ends of string while accumulating */
00520 char *btt=NULL;
00521 
00522 ksize = (int) (-1 + sqrt(8*lw+1))/2;    
00523 if (((ksize*(ksize+1))/2) < lw) 
00524     {ksize++;}
00525 arrsize = (ksize+1) * lv;                 /* dynprg array size, +1 for lastrow that moves back up. */
00526 cells = needLargeMem(arrsize * sizeof(struct cell2L));   /* best score dyn prog array */
00527 
00528 #ifdef DEBUG
00529 printf("\n k=%d \n ksize=%d \n arrsize=%d \n Q,lv=%d,%d T=%d \n \n",k,ksize,arrsize,Q,lv,T);
00530 #endif
00531 
00532 axt->next = NULL;
00533 axt->qName = cloneString(query->name);
00534 axt->tName = cloneString(target->name);
00535 axt->qStrand ='+';
00536 axt->tStrand ='+';
00537 axt->frame = 0;
00538 axt->score=0;
00539 axt->qStart=0;
00540 axt->tStart=0;
00541 axt->qEnd=0;
00542 axt->tEnd=0;
00543 axt->symCount=0;
00544 axt->qSym=NULL;
00545 axt->tSym=NULL;
00546 
00547 if ((Q==0) || (T==0))
00548     {
00549     axt->qSym=cloneString("");
00550     axt->tSym=cloneString("");
00551     freez(&cells);
00552     return axt; 
00553     }
00554 
00555 
00556 
00557 /* initialize origin corner */
00558     cells[0].bestm=0;
00559     cells[0].bestd=WORST;
00560     cells[0].besti=WORST;                 
00561     cells[0].backm='x';
00562     cells[0].backd='x';
00563     cells[0].backi='x';
00564 #ifdef DEBUG
00565     dump2L(cells); 
00566 #endif
00567 
00568 /* initialize row 0 col 1 */
00569     cells[1].bestm=WORST;
00570     cells[1].bestd=WORST;
00571     cells[1].besti=-ss->gapOpen;
00572     cells[1].backm='x';
00573     cells[1].backd='x';
00574     cells[1].backi='m';
00575 #ifdef DEBUG
00576     dump2L(cells+1); 
00577 #endif
00578 
00579 /* initialize first row of sentinels */
00580 for (c=2;c<lv;c++)
00581     {
00582     cells[c].bestm=WORST;
00583     cells[c].bestd=WORST;
00584     cells[c].besti=cells[c-1].besti-ss->gapExtend;
00585     cells[c].backm='x';
00586     cells[c].backd='x';
00587     cells[c].backi='i';
00588 #ifdef DEBUG
00589     dump2L(cells+c); 
00590 #endif
00591     }
00592 #ifdef DEBUG
00593 printf("\n");
00594 printf("\n");
00595 #endif
00596 
00597 k=ksize;
00598 
00599 ki++;  /* advance to next row */
00600 
00601 r=1;   /* r is really the rows all done */
00602 while(1)
00603     {
00604     nrows = k;  /* do k rows at a time, save every kth row on 1st pass */
00605     if (nrows > (lw-r)) {nrows=lw-r;}  /* may get less than k on last set */
00606     kmax = ki+nrows-1;
00607 
00608     kForwardAffine(cells, ki, kmax, r-ki, cmost, lv, q, t, ss, &bestbest, &bestr, &bestc, &bestdir);
00609 #ifdef DEBUG
00610 printf("\n");
00611 #endif
00612 
00613     r += nrows;
00614 
00615     if (nrows == k)   /* got full set of k rows */
00616         {
00617         /* compress, save every kth row */     
00618         /* optimize as a mem-copy */
00619         memcpy(cells+ki*lv,cells+kmax*lv,sizeof(struct cell2L) *lv);    
00620         }
00621 
00622     if (r >= lw){break;} /* we are done */
00623     
00624     ki++;
00625     k--;        /* decreasing k is "moving boundary" */
00626 }
00627 
00628 #ifdef DEBUG
00629 printf("\nFWD PASS DONE. bestbest=%d bestr=%d bestc=%d bestdir=%c \n\n",bestbest,bestr,bestc,bestdir);
00630 #endif
00631 
00632 /* start doing backtrace */
00633     
00634 /* adjust for reverse pass */
00635 
00636 /* for local we automatically skip to bestr, bestc to begin tb */
00637 
00638 if (bestbest <= 0)  /* null alignment */
00639     {
00640     bestr=0;
00641     bestc=0;
00642     /* bestdir won't matter */
00643     }
00644 
00645 r = bestr;
00646 c = bestc;
00647 dir = bestdir;
00648 cmost = c;
00649 
00650 axt->qEnd=bestc;
00651 axt->tEnd=bestr;
00652 
00653 temp = (2*ksize)+1;
00654 ki = (int)(temp-sqrt((temp*temp)-(8*r)))/2;
00655 rr = ((2*ksize*ki)+ki-(ki*ki))/2;
00656 kmax = ki+(r-rr);
00657 k = ksize - ki;
00658 
00659 
00660 /* now that we jumped back into saved start-points,
00661    let's fill the array forward and start backtrace from there.
00662 */
00663 
00664 #ifdef DEBUG
00665 printf("bestr=%d, bestc=%d, bestdir=%c k=%d, ki=%d, kmax=%d\n",bestr,bestc,bestdir,k,ki,kmax);
00666 #endif
00667 
00668 kForwardAffine(cells, ki+1, kmax, rr-ki, cmost, lv, q, t, ss, &bestbest, &bestr, &bestc, &bestdir);
00669    
00670 #ifdef DEBUG
00671 printf("\n(initial)BKWD PASS DONE. cmost=%d r=%d c=%d dir=%c \n\n",cmost,r,c,dir);
00672 #endif
00673 
00674 
00675 /* backtrace */   
00676 
00677 /* handling for resulting ali'd strings when very long */
00678 
00679 axt->symCount=0;
00680 axt->qSym = needLargeMem((Q+T+1)*sizeof(char));
00681 axt->tSym = needLargeMem((Q+T+1)*sizeof(char));
00682 btq=axt->qSym;
00683 btt=axt->tSym;
00684 while(1)
00685     {
00686     while(1)
00687         {
00688 #ifdef DEBUG
00689         printf("bt: r=%d, c=%d, dir=%c \n",r,c,dir);
00690 #endif
00691 
00692         
00693         if ((r==0) && (c==0)){break;} /* hit origin, done */
00694         if (r<rr){break;} /* ran out of targ seq, backup and reload */
00695         if (dir=='x'){errAbort("unexpected error backtracing");} /* x only at origin */
00696         if (dir=='s'){break;}   /* hit start, local ali */
00697         if (dir=='m') /* match */
00698             {
00699             *btq++=q[c-1];  /* accumulate alignment output strings */
00700             *btt++=t[r-1];  /* accumulate alignment output strings */
00701             axt->symCount++; 
00702             dir = cells[lv*(ki+r-rr)+c].backm;  /* follow backtrace */
00703             r--;            /* adjust coords to move in dir spec'd by back ptr */
00704             c--;
00705             cmost--;        /* decreases as query seq is aligned, so saves on unused areas */
00706             }
00707         else
00708             {
00709             if (dir=='d')  /* delete in query (gap) */
00710                 {
00711                 *btq++='-';     /* accumulate alignment output strings */
00712                 *btt++=t[r-1];  /* accumulate alignment output strings */
00713                 axt->symCount++; 
00714                 dir = cells[lv*(ki+r-rr)+c].backd;  /* follow backtrace */
00715                 r--;            /* adjust coords to move in dir spec'd by back ptr */
00716                 }
00717             else    /* insert in query (gap) */
00718                 {
00719                 *btq++=q[c-1];  /* accumulate alignment output strings */
00720                 *btt++='-';     /* accumulate alignment output strings */
00721                 axt->symCount++; 
00722                 dir = cells[lv*(ki+r-rr)+c].backi;  /* follow backtrace */
00723                 c--;
00724                 cmost--;        /* decreases as query seq is aligned, so saves on unused areas */
00725                 }
00726             }
00727         
00728         }
00729 
00730     /* back up and do it again */
00731     ki--;
00732     k++;   /* k grows as we move back up */ 
00733     rr-=k;
00734     kmax = ki+k-1;
00735 
00736     /* check for various termination conditions to stop main loop */
00737     if (ki < 0) {break;}
00738     if ((r==0)&&(c==0)) {break;}
00739     if (dir=='s') {break;}
00740 
00741     /* re-calculate array from previous saved kth row going back
00742        this is how we save memory, but have to regenerate half on average
00743        we are re-using the same call 
00744      */
00745 
00746 #ifdef DEBUG
00747 printf("bestr=%d, bestc=%d, bestdir=%c k=%d, ki=%d, kmax=%d\n",bestr,bestc,bestdir,k,ki,kmax);
00748 #endif
00749 
00750 
00751     kForwardAffine(cells, ki+1, kmax, rr-ki, cmost, lv, q, t, ss, &bestbest, &bestr, &bestc, &bestdir);
00752 
00753 #ifdef DEBUG
00754     printf("\nBKWD PASS DONE. cmost=%d r=%d c=%d\n\n",cmost,r,c);
00755 #endif
00756 
00757     }
00758 
00759 axt->qStart=c;
00760 axt->tStart=r;
00761 
00762 /* reverse backwards trace and zero-terminate strings */
00763 
00764 reverseBytes(axt->qSym,axt->symCount);
00765 reverseBytes(axt->tSym,axt->symCount);
00766 axt->qSym[axt->symCount]=0;
00767 axt->tSym[axt->symCount]=0;
00768 
00769 axt->score=bestbest;
00770 
00771 
00772 /* 
00773 should I test stringsize and if massively smaller, realloc string to save ram? 
00774 */
00775 
00776 freez(&cells);
00777 
00778 return axt;
00779 }
00780 
00781 
00782 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2