#include "common.h"#include "dnautil.h"#include "dlist.h"#include "fuzzyFind.h"#include "localmem.h"#include "patSpace.h"#include "trans3.h"#include "supStitch.h"#include "chainBlock.h"Include dependency graph for supStitch.c:

Go to the source code of this file.
Data Structures | |
| struct | ssNode |
| struct | ssEdge |
| struct | ssGraph |
Functions | |
| static void | ssFindBestBig (struct ffAli *ffList, bioSeq *qSeq, bioSeq *tSeq, enum ffStringency stringency, boolean isProt, struct trans3 *t3List, struct ffAli **retBestAli, int *retScore, struct ffAli **retLeftovers) |
| void | ssFfItemFree (struct ssFfItem **pEl) |
| void | ssFfItemFreeList (struct ssFfItem **pList) |
| void | ssBundleFree (struct ssBundle **pEl) |
| void | ssBundleFreeList (struct ssBundle **pList) |
| void | dumpNearCrossover (char *what, DNA *n, DNA *h, int size) |
| void | dumpFf (struct ffAli *left, DNA *needle, DNA *hay) |
| void | dumpBuns (struct ssBundle *bunList) |
| static int | bioScoreMatch (boolean isProt, char *a, char *b, int size) |
| static int | findCrossover (struct ffAli *left, struct ffAli *right, int overlap, boolean isProt) |
| static void | trans3Offsets (struct trans3 *t3List, AA *startP, AA *endP, int *retStart, int *retEnd) |
| static boolean | isMonotonic (struct ffAli *left) |
| static struct ffAli * | forceMonotonic (struct ffAli *aliList, struct dnaSeq *qSeq, struct dnaSeq *tSeq, enum ffStringency stringency, boolean isProt, struct trans3 *t3List) |
| ffAli * | smallMiddleExons (struct ffAli *aliList, struct ssBundle *bundle, enum ffStringency stringency) |
| static void | ssGraphFree (struct ssGraph **pGraph) |
| boolean | tripleCanFollow (struct ffAli *a, struct ffAli *b, aaSeq *qSeq, struct trans3 *t3List) |
| static struct ssGraph * | ssGraphMake (struct ffAli *ffList, bioSeq *qSeq, enum ffStringency stringency, boolean isProt, struct trans3 *t3List) |
| static struct ssNode * | ssDynamicProgram (struct ssGraph *graph) |
| static void | ssGraphFindBest (struct ssGraph *graph, struct ffAli **retBestAli, int *retScore, struct ffAli **retLeftovers) |
| static void | ssFindBestSmall (struct ffAli *ffList, bioSeq *qSeq, bioSeq *tSeq, enum ffStringency stringency, boolean isProt, struct trans3 *t3List, struct ffAli **retBestAli, int *retScore, struct ffAli **retLeftovers) |
| static int | ssGapCost (int dq, int dt, void *data) |
| static int | findOverlap (struct cBlock *a, struct cBlock *b) |
| static int | ssConnectCost (struct cBlock *a, struct cBlock *b, void *data) |
| static void | ssFindBest (struct ffAli *ffList, bioSeq *qSeq, bioSeq *tSeq, enum ffStringency stringency, boolean isProt, struct trans3 *t3List, struct ffAli **retBestAli, int *retScore, struct ffAli **retLeftovers) |
| ffAli * | cutAtBigIntrons (struct ffAli *ffList, int maxIntron, int *pScore, enum ffStringency stringency, struct ffAli **returnLeftovers) |
| void | ssStitch (struct ssBundle *bundle, enum ffStringency stringency, int minScore, int maxToReturn) |
| static struct ssBundle * | findBundle (struct ssBundle *list, struct dnaSeq *genoSeq) |
| ssBundle * | ssFindBundles (struct patSpace *ps, struct dnaSeq *cSeq, char *cName, enum ffStringency stringency, boolean avoidSelfSelf) |
Variables | |
| static char const | rcsid [] = "$Id: supStitch.c,v 1.38 2007/04/20 22:43:37 kent Exp $" |
| static enum ffStringency | ssStringency |
| static boolean | ssIsProt |
| static int bioScoreMatch | ( | boolean | isProt, | |
| char * | a, | |||
| char * | b, | |||
| int | size | |||
| ) | [static] |
Definition at line 111 of file supStitch.c.
References dnaOrAaScoreMatch().
Referenced by ssConnectCost(), ssFindBestBig(), and ssGraphMake().
00113 { 00114 if (isProt) 00115 { 00116 return dnaOrAaScoreMatch(a, b, size, 2, -1, 'X'); 00117 } 00118 else 00119 { 00120 return dnaOrAaScoreMatch(a, b, size, 1, -1, 'n'); 00121 } 00122 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct ffAli* cutAtBigIntrons | ( | struct ffAli * | ffList, | |
| int | maxIntron, | |||
| int * | pScore, | |||
| enum ffStringency | stringency, | |||
| struct ffAli ** | returnLeftovers | |||
| ) | [read] |
Definition at line 719 of file supStitch.c.
References ffCat(), ffScore(), ffAli::hEnd, ffAli::hStart, ffAli::left, and ffAli::right.
Referenced by ssStitch().
00724 { 00725 struct ffAli *prevFf, *ff, *cutFf = NULL; 00726 prevFf = ffList; 00727 for (ff = prevFf->right; ff != NULL; ff = ff->right) 00728 { 00729 int dt = ff->hStart - prevFf->hEnd; 00730 if (dt > maxIntron) 00731 { 00732 cutFf = prevFf; 00733 break; 00734 } 00735 prevFf = ff; 00736 } 00737 if (cutFf != NULL) 00738 { 00739 ff = cutFf->right; 00740 cutFf->right = NULL; 00741 ff->left = NULL; 00742 ffCat(returnLeftovers, &ff); 00743 *pScore = ffScore(ffList, stringency); 00744 } 00745 return ffList; 00746 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void dumpBuns | ( | struct ssBundle * | bunList | ) |
Definition at line 93 of file supStitch.c.
References dnaSeq::dna, dumpFf(), ssFfItem::ff, ssBundle::ffList, ssBundle::genoSeq, ssFfItem::next, ssBundle::next, ssBundle::qSeq, and slCount().
00095 { 00096 struct ssBundle *bun; 00097 struct ssFfItem *ffl; 00098 for (bun = bunList; bun != NULL; bun = bun->next) 00099 { 00100 struct dnaSeq *qSeq = bun->qSeq; /* Query sequence (not owned by bundle.) */ 00101 struct dnaSeq *genoSeq = bun->genoSeq; /* Genomic sequence (not owned by bundle.) */ 00102 printf("Bundle of %d between %s and %s\n", slCount(bun->ffList), qSeq->name, genoSeq->name); 00103 for (ffl = bun->ffList; ffl != NULL; ffl = ffl->next) 00104 { 00105 dumpFf(ffl->ff, bun->qSeq->dna, bun->genoSeq->dna); 00106 } 00107 } 00108 }
Here is the call graph for this function:

Definition at line 81 of file supStitch.c.
References ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, and ffAli::right.
00083 { 00084 struct ffAli *ff; 00085 for (ff = left; ff != NULL; ff = ff->right) 00086 { 00087 printf("(%ld - %ld)[%ld-%ld] ", (long)(ff->hStart-hay), (long)(ff->hEnd-hay), 00088 (long)(ff->nStart - needle), (long)(ff->nEnd - needle)); 00089 } 00090 printf("\n"); 00091 }
Definition at line 70 of file supStitch.c.
References mustWrite().
00072 { 00073 printf("%s: ", what); 00074 mustWrite(stdout, n, size); 00075 printf("\n"); 00076 printf("%s: ", what); 00077 mustWrite(stdout, h, size); 00078 printf("\n"); 00079 }
Here is the call graph for this function:

| static struct ssBundle* findBundle | ( | struct ssBundle * | list, | |
| struct dnaSeq * | genoSeq | |||
| ) | [static, read] |
Definition at line 831 of file supStitch.c.
References ssBundle::genoSeq, and ssBundle::next.
Referenced by ssFindBundles().
00835 { 00836 struct ssBundle *el; 00837 for (el = list; el != NULL; el = el->next) 00838 if (el->genoSeq == genoSeq) 00839 return el; 00840 return NULL; 00841 }
Here is the caller graph for this function:

| static int findCrossover | ( | struct ffAli * | left, | |
| struct ffAli * | right, | |||
| int | overlap, | |||
| boolean | isProt | |||
| ) | [static] |
Definition at line 125 of file supStitch.c.
References aaScore2(), aaScoreMatch(), dnaScore2(), dnaScoreMatch(), ffAli::hEnd, ffAli::hStart, and ffAli::nStart.
Referenced by ssConnectCost(), ssFindBestBig(), and ssGraphMake().
00131 { 00132 int bestPos = 0; 00133 char *nStart = right->nStart; 00134 char *lhStart = left->hEnd - overlap; 00135 char *rhStart = right->hStart; 00136 int i; 00137 int (*scoreMatch)(char a, char b); 00138 int score, bestScore; 00139 00140 if (isProt) 00141 { 00142 scoreMatch = aaScore2; 00143 score = bestScore = aaScoreMatch(nStart, rhStart, overlap); 00144 } 00145 else 00146 { 00147 scoreMatch = dnaScore2; 00148 score = bestScore = dnaScoreMatch(nStart, rhStart, overlap); 00149 } 00150 00151 for (i=0; i<overlap; ++i) 00152 { 00153 char n = nStart[i]; 00154 score += scoreMatch(lhStart[i], n); 00155 score -= scoreMatch(rhStart[i], n); 00156 if (score > bestScore) 00157 { 00158 bestScore = score; 00159 bestPos = i+1; 00160 } 00161 } 00162 return bestPos; 00163 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 553 of file supStitch.c.
References min, cBlock::qEnd, cBlock::qStart, cBlock::tEnd, and cBlock::tStart.
Referenced by ssConnectCost(), and ssFindBestBig().
00555 { 00556 int dq = b->qStart - a->qEnd; 00557 int dt = b->tStart - a->tEnd; 00558 return -min(dq, dt); 00559 }
Here is the caller graph for this function:

| static struct ffAli* forceMonotonic | ( | struct ffAli * | aliList, | |
| struct dnaSeq * | qSeq, | |||
| struct dnaSeq * | tSeq, | |||
| enum ffStringency | stringency, | |||
| boolean | isProt, | |||
| struct trans3 * | t3List | |||
| ) | [static, read] |
Definition at line 208 of file supStitch.c.
References ffFreeAli(), isMonotonic(), and ssFindBestBig().
Referenced by smallMiddleExons(), ssFindBestSmall(), and ssStitch().
00213 { 00214 if (!isProt) 00215 { 00216 if (!isMonotonic(aliList)) 00217 { 00218 struct ffAli *leftovers = NULL; 00219 int score; 00220 ssFindBestBig(aliList, qSeq, tSeq, stringency, isProt, t3List, &aliList, &score, 00221 &leftovers); 00222 ffFreeAli(&leftovers); 00223 } 00224 } 00225 return aliList; 00226 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean isMonotonic | ( | struct ffAli * | left | ) | [static] |
Definition at line 189 of file supStitch.c.
References FALSE, ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, TRUE, and verbose().
Referenced by forceMonotonic().
00191 { 00192 struct ffAli *right; 00193 00194 while ((right = left->right) != NULL) 00195 { 00196 if (left->nEnd <= right->nStart && left->hEnd <= right->hStart) 00197 left = right; 00198 else 00199 { 00200 verbose(2, "not monotonic dn %d, dh %d\n", (int)(right->nStart - left->nEnd), 00201 (int)(right->hStart - right->hEnd)); 00202 return FALSE; 00203 } 00204 } 00205 return TRUE; 00206 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct ffAli* smallMiddleExons | ( | struct ffAli * | aliList, | |
| struct ssBundle * | bundle, | |||
| enum ffStringency | stringency | |||
| ) | [read] |
Definition at line 228 of file supStitch.c.
References ffFind(), ffRightmost(), forceMonotonic(), ssBundle::genoSeq, ffAli::hEnd, ffAli::hStart, ssBundle::isProt, ffAli::left, ffAli::nEnd, ffAli::nStart, ssBundle::qSeq, ffAli::right, and ssBundle::t3List.
Referenced by ssStitch().
00232 { 00233 if (bundle->t3List != NULL) 00234 return aliList; /* Can't handle intense translated stuff. */ 00235 else 00236 { 00237 struct dnaSeq *qSeq = bundle->qSeq; 00238 struct dnaSeq *genoSeq = bundle->genoSeq; 00239 struct ffAli *right, *left = NULL, *newLeft, *newRight; 00240 00241 left = aliList; 00242 for (right = aliList->right; right != NULL; right = right->right) 00243 { 00244 if (right->hStart - left->hEnd >= 3 && right->nStart - left->nEnd >= 3) 00245 { 00246 newLeft = ffFind(left->nEnd, right->nStart, left->hEnd, right->hStart, stringency); 00247 if (newLeft != NULL) 00248 { 00249 newLeft = forceMonotonic(newLeft, qSeq, genoSeq, 00250 stringency, bundle->isProt, bundle->t3List ); 00251 newRight = ffRightmost(newLeft); 00252 if (left != NULL) 00253 { 00254 left->right = newLeft; 00255 newLeft->left = left; 00256 } 00257 else 00258 { 00259 aliList = newLeft; 00260 } 00261 if (right != NULL) 00262 { 00263 right->left = newRight; 00264 newRight->right = right; 00265 } 00266 } 00267 } 00268 left = right; 00269 } 00270 } 00271 return aliList; 00272 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void ssBundleFree | ( | struct ssBundle ** | pEl | ) |
Definition at line 47 of file supStitch.c.
References ssBundle::ffList, freez(), and ssFfItemFreeList().
Referenced by foldInExtras(), gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), gfFindAlignAaTrans(), and ssBundleFreeList().
00049 { 00050 struct ssBundle *el; 00051 if ((el = *pEl) != NULL) 00052 { 00053 ssFfItemFreeList(&el->ffList); 00054 freez(pEl); 00055 } 00056 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void ssBundleFreeList | ( | struct ssBundle ** | pList | ) |
Definition at line 58 of file supStitch.c.
References ssBundle::next, and ssBundleFree().
Referenced by addToBigBundleList().
00060 { 00061 struct ssBundle *el, *next; 00062 for (el = *pList; el != NULL; el = next) 00063 { 00064 next = el->next; 00065 ssBundleFree(&el); 00066 } 00067 *pList = NULL; 00068 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 561 of file supStitch.c.
References bioScoreMatch(), cBlock::data, findCrossover(), findOverlap(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, cBlock::qEnd, cBlock::qStart, cBlock::score, ssGapCost(), ssIsProt, cBlock::tEnd, and cBlock::tStart.
Referenced by ssFindBestBig().
00564 { 00565 struct ffAli *aFf = a->data, *bFf = b->data; 00566 int overlapAdjustment = 0; 00567 int overlap = findOverlap(a, b); 00568 int dq = b->qStart - a->qEnd; 00569 int dt = b->tStart - a->tEnd; 00570 00571 if (overlap > 0) 00572 { 00573 int aSize = aFf->hEnd - aFf->hStart; 00574 int bSize = bFf->hEnd - bFf->hStart; 00575 if (overlap >= bSize || overlap >= aSize) 00576 { 00577 /* Give stiff overlap adjustment for case where 00578 * one block completely enclosed in the other on 00579 * either sequence. This will make it never happen. */ 00580 overlapAdjustment = a->score + b->score; 00581 } 00582 else 00583 { 00584 /* More normal case - partial overlap on one or both strands. */ 00585 int crossover = findCrossover(aFf, bFf, overlap, ssIsProt); 00586 int remain = overlap - crossover; 00587 overlapAdjustment = 00588 bioScoreMatch(ssIsProt, aFf->nEnd - remain, aFf->hEnd - remain, 00589 remain) 00590 + bioScoreMatch(ssIsProt, bFf->nStart, bFf->hStart, 00591 crossover); 00592 dq -= overlap; 00593 dt -= overlap; 00594 } 00595 } 00596 return overlapAdjustment + ssGapCost(dq, dt, data); 00597 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 426 of file supStitch.c.
References ssNode::cumScore, ssEdge::next, ssGraph::nodeCount, ssEdge::nodeIn, ssGraph::nodes, ssEdge::score, and ssNode::waysIn.
Referenced by ssGraphFindBest().
00430 { 00431 int veryBestScore = -0x3fffffff; 00432 struct ssNode *veryBestNode = NULL; 00433 int nodeIx; 00434 00435 for (nodeIx = 1; nodeIx <= graph->nodeCount; ++nodeIx) 00436 { 00437 int bestScore = -0x3fffffff; 00438 int score; 00439 struct ssEdge *bestEdge = NULL; 00440 struct ssNode *curNode = &graph->nodes[nodeIx]; 00441 struct ssEdge *edge; 00442 00443 for (edge = curNode->waysIn; edge != NULL; edge = edge->next) 00444 { 00445 score = edge->score + edge->nodeIn->cumScore; 00446 if (score > bestScore) 00447 { 00448 bestScore = score; 00449 bestEdge = edge; 00450 } 00451 } 00452 curNode->bestWayIn = bestEdge; 00453 curNode->cumScore = bestScore; 00454 if (bestScore >= veryBestScore) 00455 { 00456 veryBestScore = bestScore; 00457 veryBestNode = curNode; 00458 } 00459 } 00460 return veryBestNode; 00461 }
Here is the caller graph for this function:

| void ssFfItemFree | ( | struct ssFfItem ** | pEl | ) |
Definition at line 24 of file supStitch.c.
References ssFfItem::ff, ffFreeAli(), and freez().
Referenced by ssFfItemFreeList().
00026 { 00027 struct ssFfItem *el; 00028 if ((el = *pEl) != NULL) 00029 { 00030 ffFreeAli(&el->ff); 00031 freez(pEl); 00032 } 00033 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void ssFfItemFreeList | ( | struct ssFfItem ** | pList | ) |
Definition at line 35 of file supStitch.c.
References ssFfItem::next, and ssFfItemFree().
Referenced by ssBundleFree().
00037 { 00038 struct ssFfItem *el, *next; 00039 for (el = *pList; el != NULL; el = next) 00040 { 00041 next = el->next; 00042 ssFfItemFree(&el); 00043 } 00044 *pList = NULL; 00045 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ssFindBest | ( | struct ffAli * | ffList, | |
| bioSeq * | qSeq, | |||
| bioSeq * | tSeq, | |||
| enum ffStringency | stringency, | |||
| boolean | isProt, | |||
| struct trans3 * | t3List, | |||
| struct ffAli ** | retBestAli, | |||
| int * | retScore, | |||
| struct ffAli ** | retLeftovers | |||
| ) | [static] |
Definition at line 701 of file supStitch.c.
References ffAliCount(), ssFindBestBig(), and ssFindBestSmall().
Referenced by ssStitch().
00705 { 00706 int count = ffAliCount(ffList); 00707 if (count >= 10) 00708 { 00709 ssFindBestBig(ffList, qSeq, tSeq, stringency, isProt, t3List, 00710 retBestAli, retScore, retLeftovers); 00711 } 00712 else 00713 { 00714 ssFindBestSmall(ffList, qSeq, tSeq, stringency, isProt, t3List, 00715 retBestAli, retScore, retLeftovers); 00716 } 00717 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ssFindBestBig | ( | struct ffAli * | ffList, | |
| bioSeq * | qSeq, | |||
| bioSeq * | tSeq, | |||
| enum ffStringency | stringency, | |||
| boolean | isProt, | |||
| struct trans3 * | t3List, | |||
| struct ffAli ** | retBestAli, | |||
| int * | retScore, | |||
| struct ffAli ** | retLeftovers | |||
| ) | [static] |
Definition at line 600 of file supStitch.c.
References BIGNUM, bioScoreMatch(), chain::blockList, chainBlocks(), chainFreeList(), dnaSeq::dna, ffMakeRightLinks(), findCrossover(), findOverlap(), ffAli::hEnd, ffAli::hStart, ffAli::left, lm, lmAllocVar, lmCleanup(), lmInit(), dnaSeq::name, ffAli::nEnd, chain::next, cBlock::next, ffAli::nStart, ffAli::right, chain::score, dnaSeq::size, slAddHead, ssConnectCost(), ssGapCost(), ssIsProt, ssStringency, chain::tEnd, and trans3Offsets().
Referenced by forceMonotonic(), and ssFindBest().
00605 { 00606 struct cBlock *boxList = NULL, *box, *prevBox; 00607 struct ffAli *ff, *farRight = NULL; 00608 struct lm *lm = lmInit(0); 00609 int boxSize; 00610 DNA *firstH = tSeq->dna; 00611 struct chain *chainList, *chain, *bestChain; 00612 int tMin = BIGNUM, tMax = -BIGNUM; 00613 00614 00615 /* Make up box list for chainer. */ 00616 for (ff = ffList; ff != NULL; ff = ff->right) 00617 { 00618 lmAllocVar(lm, box); 00619 boxSize = ff->nEnd - ff->nStart; 00620 box->qStart = ff->nStart - qSeq->dna; 00621 box->qEnd = box->qStart + boxSize; 00622 if (t3List) 00623 { 00624 trans3Offsets(t3List, ff->hStart, ff->hEnd, &box->tStart, &box->tEnd); 00625 } 00626 else 00627 { 00628 box->tStart = ff->hStart - firstH; 00629 box->tEnd = box->tStart + boxSize; 00630 } 00631 box->data = ff; 00632 box->score = bioScoreMatch(isProt, ff->nStart, ff->hStart, boxSize); 00633 if (tMin > box->tStart) tMin = box->tStart; 00634 if (tMax < box->tEnd) tMax = box->tEnd; 00635 slAddHead(&boxList, box); 00636 } 00637 00638 /* Adjust boxes so that tMin is always 0. */ 00639 for (box = boxList; box != NULL; box = box->next) 00640 { 00641 box->tStart -= tMin; 00642 box->tEnd -= tMin; 00643 } 00644 tMax -= tMin; 00645 00646 ssStringency = stringency; 00647 ssIsProt = isProt; 00648 chainList = chainBlocks(qSeq->name, qSeq->size, '+', "tSeq", tMax, &boxList, 00649 ssConnectCost, ssGapCost, NULL, NULL); 00650 00651 /* Fixup crossovers on best (first) chain. */ 00652 bestChain = chainList; 00653 prevBox = bestChain->blockList; 00654 for (box = prevBox->next; box != NULL; box = box->next) 00655 { 00656 int overlap = findOverlap(prevBox, box); 00657 if (overlap > 0) 00658 { 00659 struct ffAli *left = prevBox->data; 00660 struct ffAli *right = box->data; 00661 int crossover = findCrossover(left, right, overlap, isProt); 00662 int remain = overlap - crossover; 00663 left->nEnd -= remain; 00664 left->hEnd -= remain; 00665 right->nStart += crossover; 00666 right->hStart += crossover; 00667 } 00668 prevBox = box; 00669 } 00670 00671 /* Copy stuff from first chain to bestAli. */ 00672 farRight = NULL; 00673 for (box = chainList->blockList; box != NULL; box = box->next) 00674 { 00675 ff = box->data; 00676 ff->left = farRight; 00677 farRight = ff; 00678 } 00679 *retBestAli = ffMakeRightLinks(farRight); 00680 00681 /* Copy stuff from other chains to leftovers. */ 00682 farRight = NULL; 00683 for (chain = chainList->next; chain != NULL; chain = chain->next) 00684 { 00685 for (box = chain->blockList; box != NULL; box = box->next) 00686 { 00687 ff = box->data; 00688 ff->left = farRight; 00689 farRight = ff; 00690 } 00691 } 00692 *retLeftovers = ffMakeRightLinks(farRight); 00693 00694 *retScore = bestChain->score; 00695 for (chain = chainList; chain != NULL; chain = chain->next) 00696 chain->blockList = NULL; /* Don't want to free this, it's local. */ 00697 chainFreeList(&chainList); 00698 lmCleanup(&lm); 00699 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ssFindBestSmall | ( | struct ffAli * | ffList, | |
| bioSeq * | qSeq, | |||
| bioSeq * | tSeq, | |||
| enum ffStringency | stringency, | |||
| boolean | isProt, | |||
| struct trans3 * | t3List, | |||
| struct ffAli ** | retBestAli, | |||
| int * | retScore, | |||
| struct ffAli ** | retLeftovers | |||
| ) | [static] |
Definition at line 524 of file supStitch.c.
References forceMonotonic(), ssGraphFindBest(), ssGraphFree(), and ssGraphMake().
Referenced by ssFindBest().
00529 { 00530 struct ssGraph *graph = ssGraphMake(ffList, qSeq, stringency, isProt, t3List); 00531 ssGraphFindBest(graph, retBestAli, retScore, retLeftovers); 00532 *retBestAli = forceMonotonic(*retBestAli, qSeq, tSeq, stringency, isProt, t3List); 00533 ssGraphFree(&graph); 00534 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct ssBundle* ssFindBundles | ( | struct patSpace * | ps, | |
| struct dnaSeq * | cSeq, | |||
| char * | cName, | |||
| enum ffStringency | stringency, | |||
| boolean | avoidSelfSelf | |||
| ) | [read] |
Definition at line 843 of file supStitch.c.
References AllocVar, patClump::bacIx, dnaSeq::dna, ssFfItem::ff, ffFind(), findBundle(), dnaSeq::name, patClump::next, patSpaceFindOne(), sameString, patClump::seq, patClump::seqIx, dnaSeq::size, patClump::size, slAddHead, slFreeList(), and patClump::start.
00846 { 00847 struct patClump *clumpList, *clump; 00848 struct ssBundle *bundleList = NULL, *bun = NULL; 00849 DNA *cdna = cSeq->dna; 00850 int totalCdnaSize = cSeq->size; 00851 DNA *endCdna = cdna+totalCdnaSize; 00852 struct ssFfItem *ffl; 00853 struct dnaSeq *lastSeq = NULL; 00854 int maxSize = 700; 00855 int preferredSize = 500; 00856 int overlapSize = 250; 00857 00858 for (;;) 00859 { 00860 int cSize = endCdna - cdna; 00861 if (cSize > maxSize) 00862 cSize = preferredSize; 00863 clumpList = patSpaceFindOne(ps, cdna, cSize); 00864 for (clump = clumpList; clump != NULL; clump = clump->next) 00865 { 00866 struct ffAli *ff; 00867 struct dnaSeq *seq = clump->seq; 00868 DNA *tStart = seq->dna + clump->start; 00869 if (!avoidSelfSelf || !sameString(seq->name, cSeq->name)) 00870 { 00871 ff = ffFind(cdna, cdna+cSize, tStart, tStart + clump->size, stringency); 00872 if (ff != NULL) 00873 { 00874 if (lastSeq != seq) 00875 { 00876 lastSeq = seq; 00877 if ((bun = findBundle(bundleList, seq)) == NULL) 00878 { 00879 AllocVar(bun); 00880 bun->qSeq = cSeq; 00881 bun->genoSeq = seq; 00882 bun->genoIx = clump->bacIx; 00883 bun->genoContigIx = clump->seqIx; 00884 slAddHead(&bundleList, bun); 00885 } 00886 } 00887 AllocVar(ffl); 00888 ffl->ff = ff; 00889 slAddHead(&bun->ffList, ffl); 00890 } 00891 } 00892 } 00893 cdna += cSize; 00894 if (cdna >= endCdna) 00895 break; 00896 cdna -= overlapSize; 00897 slFreeList(&clumpList); 00898 } 00899 slReverse(&bundleList); 00900 cdna = cSeq->dna; 00901 00902 for (bun = bundleList; bun != NULL; bun = bun->next) 00903 { 00904 ssStitch(bun, stringency, 20, 16); 00905 } 00906 return bundleList; 00907 }
Here is the call graph for this function:

| static int ssGapCost | ( | int | dq, | |
| int | dt, | |||
| void * | data | |||
| ) | [static] |
Definition at line 541 of file supStitch.c.
References ffCalcGapPenalty(), and ssStringency.
Referenced by ssConnectCost(), and ssFindBestBig().
00544 { 00545 int cost; 00546 if (dt < 0) dt = 0; 00547 if (dq < 0) dq = 0; 00548 cost = ffCalcGapPenalty(dt, dq, ssStringency); 00549 return cost; 00550 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ssGraphFindBest | ( | struct ssGraph * | graph, | |
| struct ffAli ** | retBestAli, | |||
| int * | retScore, | |||
| struct ffAli ** | retLeftovers | |||
| ) | [static] |
Definition at line 463 of file supStitch.c.
References ssNode::bestWayIn, ssEdge::crossover, ssNode::cumScore, ssNode::ff, ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ssEdge::nodeIn, ssGraph::nodes, ffAli::nStart, ssEdge::overlap, ffAli::right, and ssDynamicProgram().
Referenced by ssFindBestSmall().
00467 { 00468 struct ssEdge *edge; 00469 struct ssNode *node; 00470 struct ssNode *startNode = &graph->nodes[0]; 00471 struct ffAli *bestAli = NULL, *leftovers = NULL; 00472 struct ffAli *ff, *left = NULL; 00473 int i; 00474 int overlap, crossover, crossDif; 00475 00476 /* Find best path and save score. */ 00477 node = ssDynamicProgram(graph); 00478 *retScore = node->cumScore; 00479 00480 /* Trace back and trim overlaps. */ 00481 while (node != startNode) 00482 { 00483 ff = node->ff; 00484 ff->right = bestAli; 00485 bestAli = ff; 00486 node->ff = NULL; 00487 edge = node->bestWayIn; 00488 if ((overlap = edge->overlap) > 0) 00489 { 00490 left = edge->nodeIn->ff; 00491 crossover = edge->crossover; 00492 crossDif = overlap-crossover; 00493 left->hEnd -= crossDif; 00494 left->nEnd -= crossDif; 00495 ff->hStart += crossover; 00496 ff->nStart += crossover; 00497 } 00498 node = node->bestWayIn->nodeIn; 00499 } 00500 00501 /* Make left links. */ 00502 left = NULL; 00503 for (ff = bestAli; ff != NULL; ff = ff->right) 00504 { 00505 ff->left = left; 00506 left = ff; 00507 } 00508 00509 /* Make leftover list. */ 00510 for (i=1, node=graph->nodes+1; i<=graph->nodeCount; ++i,++node) 00511 { 00512 if ((ff = node->ff) != NULL) 00513 { 00514 ff->left = leftovers; 00515 leftovers = ff; 00516 } 00517 } 00518 leftovers = ffMakeRightLinks(leftovers); 00519 00520 *retBestAli = bestAli; 00521 *retLeftovers = leftovers; 00522 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ssGraphFree | ( | struct ssGraph ** | pGraph | ) | [static] |
Definition at line 304 of file supStitch.c.
References ssGraph::edges, freeMem(), freez(), and ssGraph::nodes.
Referenced by ssFindBestSmall().
00306 { 00307 struct ssGraph *graph; 00308 if ((graph = *pGraph) != NULL) 00309 { 00310 freeMem(graph->nodes); 00311 freeMem(graph->edges); 00312 freez(pGraph); 00313 } 00314 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct ssGraph* ssGraphMake | ( | struct ffAli * | ffList, | |
| bioSeq * | qSeq, | |||
| enum ffStringency | stringency, | |||
| boolean | isProt, | |||
| struct trans3 * | t3List | |||
| ) | [static, read] |
Definition at line 326 of file supStitch.c.
References AllocArray, AllocVar, bioScoreMatch(), ssEdge::crossover, ssGraph::edgeCount, ssGraph::edges, ffAliCount(), ffCalcGapPenalty(), findCrossover(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ssGraph::nodeCount, ssEdge::nodeIn, ssGraph::nodes, ssNode::nodeScore, ffAli::nStart, ssEdge::overlap, ffAli::right, ssEdge::score, slAddHead, slReverse(), trans3Offsets(), tripleCanFollow(), and ssNode::waysIn.
Referenced by ssFindBestSmall().
00329 { 00330 int nodeCount = ffAliCount(ffList); 00331 int maxEdgeCount = (nodeCount+1)*(nodeCount)/2; 00332 int edgeCount = 0; 00333 struct ssEdge *edges, *e; 00334 struct ssNode *nodes; 00335 struct ssGraph *graph; 00336 struct ffAli *ff, *mid; 00337 int i, midIx; 00338 int overlap; 00339 boolean canFollow; 00340 00341 if (nodeCount == 1) 00342 maxEdgeCount = 1; 00343 00344 AllocVar(graph); 00345 graph->nodeCount = nodeCount; 00346 graph->nodes = AllocArray(nodes, nodeCount+1); 00347 for (i=1, ff = ffList; i<=nodeCount; ++i, ff = ff->right) 00348 { 00349 nodes[i].ff = ff; 00350 nodes[i].nodeScore = bioScoreMatch(isProt, ff->nStart, ff->hStart, ff->hEnd - ff->hStart); 00351 } 00352 00353 graph->edges = AllocArray(edges, maxEdgeCount); 00354 for (mid = ffList, midIx=1; mid != NULL; mid = mid->right, ++midIx) 00355 { 00356 int midScore; 00357 struct ssNode *midNode = &nodes[midIx]; 00358 e = &edges[edgeCount++]; 00359 assert(edgeCount <= maxEdgeCount); 00360 e->nodeIn = &nodes[0]; 00361 e->score = midScore = midNode->nodeScore; 00362 midNode->waysIn = e; 00363 for (ff = ffList,i=1; ff != mid; ff = ff->right,++i) 00364 { 00365 int mhStart = 0, mhEnd = 0; 00366 if (t3List) 00367 { 00368 canFollow = tripleCanFollow(ff, mid, qSeq, t3List); 00369 trans3Offsets(t3List, mid->hStart, mid->hEnd, &mhStart, &mhEnd); 00370 } 00371 else 00372 { 00373 canFollow = (ff->nStart < mid->nStart && ff->nEnd < mid->nEnd 00374 && ff->hStart < mid->hStart && ff->hEnd < mid->hEnd); 00375 } 00376 if (canFollow) 00377 { 00378 struct ssNode *ffNode = &nodes[i]; 00379 int score; 00380 int hGap; 00381 int nGap; 00382 int crossover; 00383 00384 nGap = mid->nStart - ff->nEnd; 00385 if (t3List) 00386 { 00387 int fhStart, fhEnd; 00388 trans3Offsets(t3List, ff->hStart, ff->hEnd, &fhStart, &fhEnd); 00389 hGap = mhStart - fhEnd; 00390 } 00391 else 00392 { 00393 hGap = mid->hStart - ff->hEnd; 00394 } 00395 e = &edges[edgeCount++]; 00396 assert(edgeCount <= maxEdgeCount); 00397 e->nodeIn = ffNode; 00398 e->overlap = overlap = -nGap; 00399 if (overlap > 0) 00400 { 00401 int midSize = mid->hEnd - mid->hStart; 00402 int ffSize = ff->hEnd - ff->hStart; 00403 int newMidScore, newFfScore; 00404 e->crossover = crossover = findCrossover(ff, mid, overlap, isProt); 00405 newMidScore = bioScoreMatch(isProt, mid->nStart, mid->hStart, midSize-overlap+crossover); 00406 newFfScore = bioScoreMatch(isProt, ff->nStart+crossover, ff->hStart+crossover, 00407 ffSize-crossover); 00408 score = newMidScore - ffNode->nodeScore + newFfScore; 00409 nGap = 0; 00410 hGap -= overlap; 00411 } 00412 else 00413 { 00414 score = midScore; 00415 } 00416 score -= ffCalcGapPenalty(hGap, nGap, stringency); 00417 e->score = score; 00418 slAddHead(&midNode->waysIn, e); 00419 } 00420 } 00421 slReverse(&midNode->waysIn); 00422 } 00423 return graph; 00424 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void ssStitch | ( | struct ssBundle * | bundle, | |
| enum ffStringency | stringency, | |||
| int | minScore, | |||
| int | maxToReturn | |||
| ) |
Definition at line 748 of file supStitch.c.
References AllocVar, ssBundle::avoidFuzzyFindKludge, cutAtBigIntrons(), dnaSeq::dna, FALSE, ssFfItem::ff, ffAliSort(), ffCat(), ffCdna, ffCmpHitsNeedleFirst(), ffFreeAli(), ffIntronMax, ffIntronMaxDefault, ssBundle::ffList, ffMergeClose(), ffMergeHayOverlaps(), ffMergeNeedleAlis(), ffRemoveEmptyAlis(), ffSlideIntrons(), forceMonotonic(), ssBundle::genoSeq, ssBundle::isProt, ssFfItem::next, ssBundle::qSeq, slAddHead, slFreeList(), slReverse(), smallMiddleExons(), ssFindBest(), ssBundle::t3List, and TRUE.
Referenced by ffSeedExtInMem(), foldInExtras(), gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), gfClumpsToBundles(), gfFindAlignAaTrans(), gfTransTransFindBundles(), and refineBundle().
00753 { 00754 struct dnaSeq *qSeq = bundle->qSeq; 00755 struct dnaSeq *genoSeq = bundle->genoSeq; 00756 struct ffAli *ffList = NULL; 00757 struct ssFfItem *ffl; 00758 struct ffAli *bestPath; 00759 int score; 00760 boolean firstTime = TRUE; 00761 00762 if (bundle->ffList == NULL) 00763 return; 00764 00765 /* The score may improve when we stitch together more alignments, 00766 * so don't let minScore be too harsh at this stage. */ 00767 if (minScore > 20) 00768 minScore = 20; 00769 00770 /* Create ffAlis for all in bundle and move to one big list. */ 00771 for (ffl = bundle->ffList; ffl != NULL; ffl = ffl->next) 00772 { 00773 ffCat(&ffList, &ffl->ff); 00774 } 00775 slFreeList(&bundle->ffList); 00776 00777 ffAliSort(&ffList, ffCmpHitsNeedleFirst); 00778 ffList = ffMergeClose(ffList, qSeq->dna, genoSeq->dna); 00779 00780 while (ffList != NULL) 00781 { 00782 ssFindBest(ffList, qSeq, genoSeq, stringency, 00783 bundle->isProt, bundle->t3List, 00784 &bestPath, &score, &ffList); 00785 00786 bestPath = ffMergeNeedleAlis(bestPath, TRUE); 00787 bestPath = ffRemoveEmptyAlis(bestPath, TRUE); 00788 bestPath = ffMergeHayOverlaps(bestPath); 00789 bestPath = ffRemoveEmptyAlis(bestPath, TRUE); 00790 bestPath = forceMonotonic(bestPath, qSeq, genoSeq, stringency, 00791 bundle->isProt, bundle->t3List); 00792 00793 if (firstTime && stringency == ffCdna && bundle->avoidFuzzyFindKludge == FALSE) 00794 { 00795 /* Only look for middle exons the first time. Next times 00796 * this might regenerate most of the first alignment... */ 00797 bestPath = smallMiddleExons(bestPath, bundle, stringency); 00798 } 00799 bestPath = ffMergeNeedleAlis(bestPath, TRUE); 00800 if (ffIntronMax != ffIntronMaxDefault) 00801 { 00802 bestPath = cutAtBigIntrons(bestPath, ffIntronMax, &score, stringency, 00803 &ffList); 00804 } 00805 if (!bundle->isProt) 00806 ffSlideIntrons(bestPath); 00807 bestPath = ffRemoveEmptyAlis(bestPath, TRUE); 00808 if (score >= minScore) 00809 { 00810 AllocVar(ffl); 00811 ffl->ff = bestPath; 00812 slAddHead(&bundle->ffList, ffl); 00813 } 00814 else 00815 { 00816 ffFreeAli(&bestPath); 00817 ffFreeAli(&ffList); 00818 break; 00819 } 00820 firstTime = FALSE; 00821 if (--maxToReturn <= 0) 00822 { 00823 ffFreeAli(&ffList); 00824 break; 00825 } 00826 } 00827 slReverse(&bundle->ffList); 00828 return; 00829 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void trans3Offsets | ( | struct trans3 * | t3List, | |
| AA * | startP, | |||
| AA * | endP, | |||
| int * | retStart, | |||
| int * | retEnd | |||
| ) | [static] |
Definition at line 165 of file supStitch.c.
References dnaSeq::dna, trans3::next, trans3::seq, dnaSeq::size, trans3::start, and trans3::trans.
Referenced by ssFindBestBig(), ssGraphMake(), and tripleCanFollow().
00168 { 00169 struct trans3 *t3; 00170 int frame; 00171 aaSeq *seq; 00172 00173 for (t3 = t3List; t3 != NULL; t3 = t3->next) 00174 { 00175 for (frame = 0; frame < 3; ++frame) 00176 { 00177 seq = t3->trans[frame]; 00178 if (seq->dna <= startP && startP < seq->dna + seq->size) 00179 { 00180 *retStart = 3*(startP - seq->dna)+frame + t3->start; 00181 *retEnd = 3*(endP - seq->dna)+frame + t3->start; 00182 return; 00183 } 00184 } 00185 } 00186 internalErr(); 00187 }
Here is the caller graph for this function:

| boolean tripleCanFollow | ( | struct ffAli * | a, | |
| struct ffAli * | b, | |||
| aaSeq * | qSeq, | |||
| struct trans3 * | t3List | |||
| ) |
Definition at line 316 of file supStitch.c.
References ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, and trans3Offsets().
Referenced by ssGraphMake().
00318 { 00319 int ahStart, ahEnd, bhStart, bhEnd; 00320 trans3Offsets(t3List, a->hStart, a->hEnd, &ahStart, &ahEnd); 00321 trans3Offsets(t3List, b->hStart, b->hEnd, &bhStart, &bhEnd); 00322 return (a->nStart < b->nStart && a->nEnd < b->nEnd && ahStart < bhStart && ahEnd < bhEnd); 00323 }
Here is the call graph for this function:

Here is the caller graph for this function:

char const rcsid[] = "$Id: supStitch.c,v 1.38 2007/04/20 22:43:37 kent Exp $" [static] |
Definition at line 18 of file supStitch.c.
boolean ssIsProt [static] |
enum ffStringency ssStringency [static] |
1.5.2