lib/chainConnect.c

Go to the documentation of this file.
00001 /* chainConnect - Stuff to cope with connect costs and overlaps when 
00002  * making chains. This works closely with the chainBlock module. */
00003 
00004 #include "common.h"
00005 #include "chain.h"
00006 #include "axt.h"
00007 #include "gapCalc.h"
00008 #include "chainConnect.h"
00009 
00010 static char const rcsid[] = "$Id: chainConnect.c,v 1.3 2005/08/07 23:25:26 baertsch Exp $";
00011 
00012 double chainScoreBlock(char *q, char *t, int size, int matrix[256][256])
00013 /* Score block through matrix. */
00014 {
00015 double score = 0;
00016 int i;
00017 for (i=0; i<size; ++i)
00018     score += matrix[(int)q[i]][(int)t[i]];
00019 return score;
00020 }
00021 
00022 double chainCalcScore(struct chain *chain, struct axtScoreScheme *ss, 
00023         struct gapCalc *gapCalc, struct dnaSeq *query, struct dnaSeq *target)
00024 /* Calculate chain score freshly. */
00025 {
00026 struct cBlock *b1, *b2;
00027 double score = 0;
00028 for (b1 = chain->blockList; b1 != NULL; b1 = b2)
00029     {
00030     score += chainScoreBlock(query->dna + b1->qStart, 
00031         target->dna + b1->tStart, b1->tEnd - b1->tStart, ss->matrix);
00032     b2 = b1->next;
00033     if (b2 != NULL)
00034         score -=  gapCalcCost(gapCalc, b2->qStart - b1->qEnd, 
00035                               b2->tStart - b1->tEnd);
00036     }
00037 return score;
00038 }
00039 
00040 double chainCalcScoreSubChain(struct chain *chain, struct axtScoreScheme *ss, 
00041         struct gapCalc *gapCalc, struct dnaSeq *query, struct dnaSeq *target)
00042 /* Calculate chain score assuming query and target 
00043    span the chained region rather than entire chrom. */
00044 {
00045 struct cBlock *b1, *b2;
00046 double score = 0;
00047 for (b1 = chain->blockList; b1 != NULL; b1 = b2)
00048     {
00049     score += chainScoreBlock(query->dna + (b1->qStart) - chain->qStart, 
00050         target->dna + (b1->tStart) - chain->tStart, b1->tEnd - b1->tStart, ss->matrix);
00051     b2 = b1->next;
00052     if (b2 != NULL)
00053         score -=  gapCalcCost(gapCalc, b2->qStart - b1->qEnd, 
00054                               b2->tStart - b1->tEnd);
00055     }
00056 return score;
00057 }
00058 
00059 void cBlockFindCrossover(struct cBlock *left, struct cBlock *right,
00060         struct dnaSeq *qSeq, struct dnaSeq *tSeq,  
00061         int overlap, int matrix[256][256], int *retPos, int *retScoreAdjustment)
00062 /* Find ideal crossover point of overlapping blocks.  That is
00063  * the point where we should start using the right block rather
00064  * than the left block.  This point is an offset from the start
00065  * of the overlapping region (which is the same as the start of the
00066  * right block). */
00067 {
00068 int bestPos = 0;
00069 char *rqStart = qSeq->dna + right->qStart;
00070 char *lqStart = qSeq->dna + left->qEnd - overlap;
00071 char *rtStart = tSeq->dna + right->tStart;
00072 char *ltStart = tSeq->dna + left->tEnd - overlap;
00073 int i;
00074 double score, bestScore, rScore, lScore;
00075 
00076 /* Make sure overlap is not larger than either block size: */
00077 if (overlap > (left->tEnd - left->tStart) ||
00078     overlap > (right->tEnd - right->tStart))
00079     errAbort("overlap is %d -- too large for one of these:\n"
00080              "qSize=%d  tSize=%d\n"
00081              "left: qStart=%d qEnd=%d (%d) tStart=%d tEnd=%d (%d)\n"
00082              "right: qStart=%d qEnd=%d (%d) tStart=%d tEnd=%d (%d)\n",
00083              overlap, qSeq->size, tSeq->size,
00084              left->qStart, left->qEnd, left->qEnd - left->qStart,
00085              left->tStart, left->tEnd, left->tEnd - left->tStart,
00086              right->qStart, right->qEnd, right->qEnd - right->qStart, 
00087              right->tStart, right->tEnd, right->tEnd - right->tStart);
00088 
00089 score = bestScore = rScore = chainScoreBlock(rqStart, rtStart, overlap, matrix);
00090 lScore = chainScoreBlock(lqStart, ltStart, overlap, matrix);
00091 for (i=0; i<overlap; ++i)
00092     {
00093     score += matrix[(int)lqStart[i]][(int)ltStart[i]];
00094     score -= matrix[(int)rqStart[i]][(int)rtStart[i]];
00095     if (score > bestScore)
00096         {
00097         bestScore = score;
00098         bestPos = i+1;
00099         }
00100     }
00101 *retPos = bestPos;
00102 *retScoreAdjustment = rScore + lScore - bestScore;
00103 }
00104 
00105 
00106 int chainConnectGapCost(int dq, int dt, struct chainConnect *cc)
00107 /* Calculate cost of non-overlapping gap. */
00108 {
00109 return gapCalcCost(cc->gapCalc, dq, dt);
00110 }
00111 
00112 int chainConnectCost(struct cBlock *a, struct cBlock *b, 
00113         struct chainConnect *cc)
00114 /* Calculate connection cost - including gap score
00115  * and overlap adjustments if any. */
00116 {
00117 int dq = b->qStart - a->qEnd;
00118 int dt = b->tStart - a->tEnd;
00119 int overlapAdjustment = 0;
00120 
00121 if (a->qStart >= b->qStart || a->tStart >= b->tStart)
00122     {
00123     errAbort("a (%d %d) not strictly before b (%d %d)",
00124         a->qStart, a->tStart, b->qStart, b->tStart);
00125     }
00126 if (dq < 0 || dt < 0)
00127    {
00128    int bSize = b->qEnd - b->qStart;
00129    int aSize = a->qEnd - a->qStart;
00130    int overlap = -min(dq, dt);
00131    int crossover;
00132    if (overlap >= bSize || overlap >= aSize) 
00133        {
00134        /* One of the blocks is enclosed completely on one dimension
00135         * or other by the other.  Discourage this overlap. */
00136        overlapAdjustment = 100000000;
00137        }
00138    else
00139        {
00140        cBlockFindCrossover(a, b, cc->query, cc->target, overlap, 
00141                 cc->ss->matrix, &crossover, &overlapAdjustment);
00142        dq += overlap;
00143        dt += overlap;
00144        }
00145    }
00146 return overlapAdjustment + gapCalcCost(cc->gapCalc, dq, dt);
00147 }
00148 
00149 static void checkStartBeforeEnd(struct chain *chain, char *message)
00150 /* Check qStart < qEnd, tStart < tEnd for each block. */
00151 {
00152 struct cBlock *b;
00153 for (b = chain->blockList; b != NULL; b = b->next)
00154     {
00155     if (b->qStart >= b->qEnd || b->tStart >= b->tEnd)
00156         errAbort("Start after end in (%d %d) to (%d %d) %s",
00157             b->qStart, b->tStart, b->qEnd, b->tEnd, message);
00158     }
00159 }
00160 
00161 static void checkChainGaps(struct chain *chain, char *message)
00162 /* Check that gaps between blocks are non-negative. */
00163 {
00164 struct cBlock *a, *b;
00165 a = chain->blockList;
00166 if (a == NULL)
00167     return;
00168 for (b = a->next; b != NULL; b = b->next)
00169     {
00170     if (a->qEnd > b->qStart || a->tEnd > b->tStart)
00171         {
00172         errAbort("Negative gap between (%d %d - %d %d) and (%d %d - %d %d) %s",
00173             a->qStart, a->tStart, a->qEnd, a->tEnd, 
00174             b->qStart, b->tStart, b->qEnd, b->tEnd, message);
00175         }
00176     a = b;
00177     }
00178 }
00179 
00180 static void checkChainIncreases(struct chain *chain, char *message)
00181 /* Check that qStart and tStart both strictly increase
00182  * in chain->blockList. */
00183 {
00184 struct cBlock *a, *b;
00185 a = chain->blockList;
00186 if (a == NULL)
00187     return;
00188 for (b = a->next; b != NULL; b = b->next)
00189     {
00190     if (a->qStart >= b->qStart || a->tStart >= b->tStart)
00191         {
00192         errAbort("a (%d %d) not before b (%d %d) %s",
00193             a->qStart, a->tStart, b->qStart, b->tStart, message);
00194         }
00195     a = b;
00196     }
00197 }
00198 
00199 void chainCalcBounds(struct chain *chain)
00200 /* Recalculate chain boundaries - setting qStart/qEnd/tStart/tEnd from
00201  * a scan of blockList. */
00202 {
00203 struct cBlock *b = chain->blockList;
00204 if (chain->blockList == NULL)
00205     return;
00206 chain->qStart = b->qStart;
00207 chain->tStart = b->tStart;
00208 b = slLastEl(chain->blockList);
00209 chain->qEnd = b->qEnd;
00210 chain->tEnd = b->tEnd;
00211 }
00212 
00213 static boolean removeNegativeBlocks(struct chain *chain)
00214 /* Removing the partial overlaps occassional results
00215  * in all of a block being removed.  This routine
00216  * removes the dried up husks of these blocks 
00217  * and returns TRUE if it finds any. */
00218 {
00219 struct cBlock *newList = NULL, *b, *next;
00220 boolean gotNeg = FALSE;
00221 for (b = chain->blockList; b != NULL; b = next)
00222     {
00223     next = b->next;
00224     if (b->qStart >= b->qEnd || b->tStart >= b->tEnd)
00225         {
00226         gotNeg = TRUE;
00227         freeMem(b);
00228         }
00229     else
00230         {
00231         slAddHead(&newList, b);
00232         }
00233     }
00234 slReverse(&newList);
00235 chain->blockList = newList;
00236 if (gotNeg)
00237     chainCalcBounds(chain);
00238 return gotNeg;
00239 }
00240 
00241 void setChainBounds(struct chain *chain)
00242 /* Set chain overall bounds to fit blocks. */
00243 {
00244 struct cBlock *b = chain->blockList;
00245 chain->qStart = b->qStart;
00246 chain->tStart = b->tStart;
00247 while (b->next != NULL)
00248      b = b->next;
00249 chain->qEnd = b->qEnd;
00250 chain->tEnd = b->tEnd;
00251 }
00252 
00253 void chainRemovePartialOverlaps(struct chain *chain, 
00254         struct dnaSeq *qSeq, struct dnaSeq *tSeq, int matrix[256][256])
00255 /* If adjacent blocks overlap then find crossover points between them. */
00256 {
00257 struct cBlock *a, *b;
00258 boolean totalTrimA, totalTrimB;
00259 
00260 assert(chain->blockList != NULL);
00261 
00262 /* Do an internal sanity check to make sure that things
00263  * really are sorted by both qStart and tStart. */
00264 checkChainIncreases(chain, "before removePartialOverlaps");
00265 
00266 /* Remove overlapping portion of blocks.  In some
00267  * tricky repeating regions this can result in 
00268  * complete blocks being removed.  This complicates
00269  * the loop below in some cases, forcing us to essentially
00270  * start over when the first of the two blocks we
00271  * are examining gets trimmed out completely. */
00272 for (;;)
00273     {
00274     totalTrimA = totalTrimB = FALSE;
00275     a = chain->blockList;
00276     b = a->next;
00277     for (;;)
00278         {
00279         int dq, dt;
00280         if (b == NULL)
00281             break;
00282         dq = b->qStart - a->qEnd;
00283         dt = b->tStart - a->tEnd;
00284         if (dq < 0 || dt < 0)
00285            {
00286            int overlap = -min(dq, dt);
00287            int aSize = a->qEnd - a->qStart;
00288            int bSize = b->qEnd - b->qStart;
00289            int crossover, invCross, overlapAdjustment;
00290            if (overlap >= aSize || overlap >= bSize)
00291                {
00292                totalTrimB = TRUE;
00293                }
00294            else
00295                {
00296                cBlockFindCrossover(a, b, qSeq, tSeq, overlap, matrix, 
00297                              &crossover, &overlapAdjustment);
00298                b->qStart += crossover;
00299                b->tStart += crossover;
00300                invCross = overlap - crossover;
00301                a->qEnd -= invCross;
00302                a->tEnd -= invCross;
00303                if (b->qEnd <= b->qStart)
00304                    {
00305                    totalTrimB = TRUE;
00306                    }
00307                else if (a->qEnd <= a->qStart)
00308                    {
00309                    totalTrimA = TRUE;
00310                    }
00311                }
00312            }
00313         if (totalTrimA == TRUE)
00314             {
00315             removeNegativeBlocks(chain);
00316             break;
00317             }
00318         else if (totalTrimB == TRUE)
00319             {
00320             b = b->next;
00321             freez(&a->next);
00322             a->next = b;
00323             totalTrimB = FALSE;
00324             }
00325         else
00326             {
00327             a = b;
00328             b = b->next;
00329             }
00330         }
00331     if (!totalTrimA)
00332         break;
00333     }
00334 
00335 /* Reset chain bounds - may have clipped them in this
00336  * process. */
00337 setChainBounds(chain);
00338 
00339 /* Do internal sanity checks. */
00340 checkChainGaps(chain, "after removePartialOverlaps");
00341 checkStartBeforeEnd(chain, "after removePartialOverlaps");
00342 }
00343 
00344 void chainMergeAbutting(struct chain *chain)
00345 /* Merge together blocks in a chain that abut each
00346  * other exactly. */
00347 {
00348 struct cBlock *newList = NULL, *b, *last = NULL, *next;
00349 for (b = chain->blockList; b != NULL; b = next)
00350     {
00351     next = b->next;
00352     if (last == NULL || last->qEnd != b->qStart || last->tEnd != b->tStart)
00353         {
00354         slAddHead(&newList, b);
00355         last = b;
00356         }
00357     else
00358         {
00359         last->qEnd = b->qEnd;
00360         last->tEnd = b->tEnd;
00361         freeMem(b);
00362         }
00363     }
00364 slReverse(&newList);
00365 chain->blockList = newList;
00366 }
00367 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2