#include "common.h"#include "dnautil.h"#include "localmem.h"#include "errabort.h"#include "fuzzyFind.h"#include "obscure.h"Include dependency graph for fuzzyFind.c:

Go to the source code of this file.
Data Structures | |
| struct | protoGene |
Functions | |
| void | dumpFf (struct ffAli *left, DNA *needle, DNA *hay) |
| static void | ffAbort () |
| static void | ffMemInit () |
| static void | ffMemCleanup () |
| static void * | ffNeedMem (size_t size) |
| static void | makeFreqTable (DNA *dna, int dnaSize, double freq[4]) |
| static double | oligoProb (DNA *oligo, int size, double freq[4]) |
| static boolean | findImprobableOligo (DNA *needle, int needleLength, double maxProb, double freq[4], DNA **rOligo, int *rOligoLength, double *rOligoProb) |
| static boolean | hasRepeat (DNA *oligo, int oligoLen) |
| static boolean | ffFindGoodOligo (DNA *needle, int needleLength, double maxProb, double freq[4], DNA **rOligo, int *rOligoLength, double *rOligoProb) |
| static boolean | leftNextMatch (struct ffAli *ali, DNA *ns, DNA *ne, DNA *hs, DNA *he, int gapPenalty, int maxSkip) |
| static boolean | rightNextMatch (struct ffAli *ali, DNA *ns, DNA *ne, DNA *hs, DNA *he, int gapPenalty, int maxSkip) |
| static boolean | expandRight (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd, int numSkips, int gapPenalty, int maxSkip) |
| static boolean | expandLeft (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd, int numSkips, int gapPenalty, int maxSkip) |
| static boolean | exactFind (DNA *needle, int nSize, DNA *hay, int hSize, int *hOffset) |
| static struct ffAli * | reconsiderAlignedGaps (struct ffAli *ali) |
| static struct ffAli * | trimAlis (struct ffAli *aliList) |
| void | setFfExtendThroughN (boolean val) |
| boolean | expandThroughNRight (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd) |
| boolean | expandThroughNLeft (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd) |
| static struct ffAli * | expandAlis (struct ffAli *ali, DNA *nStart, DNA *nEnd, DNA *hStart, DNA *hEnd, int gapPenalty, int maxSkip) |
| DNA * | matchInMem (DNA *ns, DNA *ne, DNA *hs, DNA *he) |
| ffAli * | findAliBetween (DNA *tile, int tileSize, DNA *ns, DNA *ne, DNA *hs, DNA *he) |
| double | evalExactAli (struct ffAli *ali, DNA *ns, DNA *ne, DNA *hs, DNA *he, int numTiles, double freq[4]) |
| void | ffUnlink (struct ffAli **pList, struct ffAli *el) |
| void | unlinkAli (struct ffAli **pList, struct ffAli *ali) |
| int | cmpProtoSize (const void *va, const void *vb) |
| int | cmpProtoNeedle (const void *va, const void *vb) |
| int | cmpProtoScore (const void *va, const void *vb) |
| boolean | canAdd (struct protoGene *a, struct protoGene *b) |
| boolean | bestMerger (struct protoGene *list, enum ffStringency stringency, DNA *ns, DNA *hs, struct protoGene **retA, struct protoGene **retB) |
| void | mergeProtoGenes (struct protoGene **pList, struct protoGene *a, struct protoGene *b) |
| void | lumpProtoGenes (struct protoGene **pList, DNA *ns, DNA *hs, enum ffStringency stringency) |
| protoGene * | lumpHits (struct ffAli **pHitList, DNA *ns, DNA *hs) |
| void | thinProtoList (struct protoGene **pList, int maxToTake) |
| static struct ffAli * | weaveAli (struct ffAli *hitList, DNA *ns, DNA *ne, DNA *hs, DNA *he, double freq[4], int *rBestVal, enum ffStringency stringency) |
| static struct ffAli * | rwFindTilesBetween (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency, double probMax) |
| static struct ffAli * | exactAli (DNA *ns, DNA *ne, DNA *hs, DNA *he) |
| static struct ffAli * | recursiveWeave (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency, double probMax, int level, int orientation) |
| static struct ffAli * | findWovenTiles (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency) |
| static struct ffAli * | findBestAli (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency) |
| static struct ffAli * | saveAliToPermanentMem (struct ffAli *volatileAli) |
| ffAli * | ffFind (DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd, enum ffStringency stringency) |
| boolean | ffFindAndScore (DNA *needle, int needleSize, DNA *haystack, int haySize, enum ffStringency stringency, struct ffAli **pAli, boolean *pRcNeedle, int *pScore) |
| boolean | ffFindEitherStrandN (DNA *needle, int needleSize, DNA *haystack, int haySize, enum ffStringency stringency, struct ffAli **pAli, boolean *pRcNeedle) |
| boolean | ffFindEitherStrand (DNA *needle, DNA *haystack, enum ffStringency stringency, struct ffAli **pAli, boolean *pRcNeedle) |
Variables | |
| static char const | rcsid [] = "$Id: fuzzyFind.c,v 1.17 2006/05/09 01:05:55 markd Exp $" |
| static jmp_buf | ffRecover |
| static struct lm * | ffMemPool = NULL |
| static int | extendThroughN |
| static double | rwFreq [4] |
| static boolean | rwIsCdna |
| static boolean | rwCheckGoodEnough |
| boolean bestMerger | ( | struct protoGene * | list, | |
| enum ffStringency | stringency, | |||
| DNA * | ns, | |||
| DNA * | hs, | |||
| struct protoGene ** | retA, | |||
| struct protoGene ** | retB | |||
| ) |
Definition at line 934 of file fuzzyFind.c.
References canAdd(), FALSE, ffCdna, protoGene::hEnd, protoGene::hStart, protoGene::nEnd, protoGene::nStart, and protoGene::right.
Referenced by lumpProtoGenes().
00937 { 00938 int bestScore = -0x7fffffff; 00939 int score; 00940 struct protoGene *bestA = NULL, *bestB = NULL; 00941 struct protoGene *a, *b; 00942 boolean isCdna = (stringency == ffCdna); 00943 00944 if (list == NULL) 00945 return FALSE; 00946 00947 for (a = list; a != NULL; a = a->right) 00948 { 00949 for (b = a->right; b != NULL; b = b->right) 00950 { 00951 if (canAdd(a,b)) 00952 { 00953 int hGap = b->hStart - a->hEnd; 00954 int nGap = b->nStart - a->nEnd; 00955 if (hGap < 0) 00956 { 00957 hGap = -8*hGap; 00958 if (!isCdna || hGap < 32) 00959 hGap = (hGap*hGap); 00960 } 00961 if (nGap < 0) 00962 nGap = -8*nGap; 00963 score = -hGap - nGap*nGap; 00964 if (score > bestScore) 00965 { 00966 bestA = a; 00967 bestB = b; 00968 bestScore = score; 00969 } 00970 } 00971 } 00972 } 00973 *retA = bestA; 00974 *retB = bestB; 00975 return bestA != NULL; 00976 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 906 of file fuzzyFind.c.
References FALSE, ffAli::hEnd, protoGene::hEnd, protoGene::hits, ffAli::hStart, protoGene::hStart, ffAli::nEnd, protoGene::nEnd, ffAli::nStart, protoGene::nStart, ffAli::right, and TRUE.
Referenced by bestMerger().
00909 { 00910 struct ffAli *pa; 00911 DNA *bnEnd = b->nEnd; 00912 DNA *bnStart = b->nStart; 00913 DNA *bhEnd = b->hEnd; 00914 DNA *bhStart = b->hStart; 00915 int bSize = bnEnd - bnStart; 00916 int maxOverlap; 00917 for (pa = a->hits; pa != NULL; pa = pa->right) 00918 { 00919 int aSize = pa->nEnd - pa->nStart; 00920 DNA *start = (bnStart > pa->nStart ? bnStart : pa->nStart); 00921 DNA *end = (bnEnd < pa->nEnd ? bnEnd : pa->nEnd); 00922 maxOverlap = (aSize < bSize ? aSize : bSize) / 4; 00923 if (maxOverlap < 2) maxOverlap = 2; 00924 if (end - start >= maxOverlap) 00925 return FALSE; 00926 start = (bhStart > pa->hStart ? bhStart : pa->hStart); 00927 end = (bhEnd < pa->hEnd ? bhEnd : pa->hEnd); 00928 if (end - start >= maxOverlap) 00929 return FALSE; 00930 } 00931 return TRUE; 00932 }
Here is the caller graph for this function:

| int cmpProtoNeedle | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 890 of file fuzzyFind.c.
References protoGene::nStart.
00892 { 00893 const struct protoGene *a = *((struct protoGene **)va); 00894 const struct protoGene *b = *((struct protoGene **)vb); 00895 return (a->nStart - b->nStart); 00896 }
| int cmpProtoScore | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 898 of file fuzzyFind.c.
References protoGene::score.
Referenced by thinProtoList().
00900 { 00901 const struct protoGene *a = *((struct protoGene **)va); 00902 const struct protoGene *b = *((struct protoGene **)vb); 00903 return (b->score - a->score); 00904 }
Here is the caller graph for this function:

| int cmpProtoSize | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 879 of file fuzzyFind.c.
References protoGene::nEnd, and protoGene::nStart.
00881 { 00882 const struct protoGene *a = *((struct protoGene **)va); 00883 const struct protoGene *b = *((struct protoGene **)vb); 00884 int aSize, bSize; 00885 aSize = a->nEnd - a->nStart; 00886 bSize = b->nEnd - b->nStart; 00887 return (bSize - aSize); 00888 }
Definition at line 81 of file supStitch.c.
Referenced by dumpBuns().
00083 { 00084 struct ffAli *ff; 00085 for (ff = left; ff != NULL; ff = ff->right) 00086 { 00087 printf("(%ld - %ld)[%ld-%ld] ", (long)(ff->hStart-hay), (long)(ff->hEnd-hay), 00088 (long)(ff->nStart - needle), (long)(ff->nEnd - needle)); 00089 } 00090 printf("\n"); 00091 }
Here is the caller graph for this function:

| double evalExactAli | ( | struct ffAli * | ali, | |
| DNA * | ns, | |||
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| int | numTiles, | |||
| double | freq[4] | |||
| ) |
Definition at line 831 of file fuzzyFind.c.
References ffAli::nEnd, ffAli::nStart, oligoProb(), and ffAli::right.
Referenced by rwFindTilesBetween().
00833 { 00834 int haySize = he-hs; 00835 double prob = 1.0; 00836 double allPossibles = haySize*numTiles; 00837 00838 for (;ali != NULL;ali=ali->right) 00839 { 00840 double p = oligoProb(ali->nStart, ali->nEnd - ali->nStart, freq) * allPossibles; 00841 if (p < 1.0) 00842 prob *= p; 00843 } 00844 return prob; 00845 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 1236 of file fuzzyFind.c.
References exactFind(), and ffNeedMem().
Referenced by recursiveWeave().
01238 { 01239 int exactOffset; 01240 if (exactFind(ns, ne-ns, hs, he-hs, &exactOffset)) 01241 { 01242 struct ffAli *ali = ffNeedMem(sizeof(*ali)); 01243 ali->nStart = ns; 01244 ali->nEnd = ne; 01245 ali->hStart = hs + exactOffset; 01246 ali->hEnd = ali->hStart + (ne - ns); 01247 if (ali->hEnd > he) 01248 ali->hEnd = he; 01249 return ali; 01250 } 01251 else 01252 return NULL; 01253 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 498 of file fuzzyFind.c.
Referenced by exactAli().
00500 { 00501 DNA firstC = needle[0]; 00502 DNA *restOfNeedle = needle+1; 00503 int i; 00504 int endIx = hSize - nSize; 00505 int restSize = nSize-1; 00506 00507 for (i = 0; i<=endIx; ++i) 00508 { 00509 if (firstC == hay[i]) 00510 { 00511 if (memcmp(restOfNeedle, hay+i+1, restSize) == 0) 00512 { 00513 *hOffset = i; 00514 return TRUE; 00515 } 00516 } 00517 } 00518 *hOffset = -1; 00519 return FALSE; 00520 }
Here is the caller graph for this function:

| static struct ffAli* expandAlis | ( | struct ffAli * | ali, | |
| DNA * | nStart, | |||
| DNA * | nEnd, | |||
| DNA * | hStart, | |||
| DNA * | hEnd, | |||
| int | gapPenalty, | |||
| int | maxSkip | |||
| ) | [static, read] |
Definition at line 669 of file fuzzyFind.c.
References expandThroughNLeft(), expandThroughNRight(), FALSE, ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, and TRUE.
Referenced by findBestAli().
00672 { 00673 struct ffAli *a, *left, *right; 00674 DNA *ns, *ne, *hs, *he; 00675 boolean expanded = TRUE; 00676 00677 while (expanded) 00678 { 00679 expanded = FALSE; 00680 /* Loop through expanding three times. */ 00681 /* First just expand through N's that don't require an insertion/deletion. */ 00682 for (a = ali; a != NULL; a = a->right) 00683 { 00684 if ((left = a->left) != NULL) 00685 { 00686 ns = left->nEnd; 00687 hs = left->hEnd; 00688 } 00689 else 00690 { 00691 ns = nStart; 00692 hs = hStart; 00693 } 00694 if ((right = a->right) != NULL) 00695 { 00696 ne = right->nStart; 00697 he = right->hStart; 00698 } 00699 else 00700 { 00701 ne = nEnd; 00702 he = hEnd; 00703 } 00704 expanded |= expandThroughNLeft(a, ns, ne, hs, he); 00705 expanded |= expandThroughNRight(a, ns, ne, hs, he); 00706 } 00707 /* Second do other expansion that doesn't require an insertion/deletion. */ 00708 for (a = ali; a != NULL; a = a->right) 00709 { 00710 if ((left = a->left) != NULL) 00711 { 00712 ns = left->nEnd; 00713 hs = left->hEnd; 00714 } 00715 else 00716 { 00717 ns = nStart; 00718 hs = hStart; 00719 } 00720 if ((right = a->right) != NULL) 00721 { 00722 ne = right->nStart; 00723 he = right->hStart; 00724 } 00725 else 00726 { 00727 ne = nEnd; 00728 he = hEnd; 00729 } 00730 expanded |= expandLeft(a, ns, ne, hs, he, 0, gapPenalty, maxSkip); 00731 expanded |= expandRight(a, ns, ne, hs, he, 0, gapPenalty, maxSkip); 00732 } 00733 /* Finally do insertion/deletion. */ 00734 for (a = ali; a != NULL; a = a->right) 00735 { 00736 if ((left = a->left) != NULL) 00737 { 00738 ns = left->nEnd; 00739 hs = left->hEnd; 00740 } 00741 else 00742 { 00743 ns = nStart; 00744 hs = hStart; 00745 } 00746 if ((right = a->right) != NULL) 00747 { 00748 ne = right->nStart; 00749 he = right->hStart; 00750 } 00751 else 00752 { 00753 ne = nEnd; 00754 he = hEnd; 00755 } 00756 expanded |= expandLeft(a, ns, ne, hs, he, 1, gapPenalty, maxSkip); 00757 expanded |= expandRight(a, ns, ne, hs, he, 1, gapPenalty, maxSkip); 00758 } 00759 while (ali->left) 00760 ali = ali->left; 00761 } 00762 return ali; 00763 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean expandLeft | ( | struct ffAli * | ali, | |
| DNA * | needleStart, | |||
| DNA * | needleEnd, | |||
| DNA * | hayStart, | |||
| DNA * | hayEnd, | |||
| int | numSkips, | |||
| int | gapPenalty, | |||
| int | maxSkip | |||
| ) | [static] |
Definition at line 355 of file fuzzyFind.c.
References dnaScoreMatch(), expandRight(), ffNeedMem(), ffAli::hStart, ffAli::left, leftNextMatch(), ffAli::nStart, and ffAli::right.
Referenced by expandRight().
00359 { 00360 DNA *ns = ali->nStart; 00361 DNA *hs = ali->hStart; 00362 int score; 00363 DNA *oldNs = ns; 00364 00365 for (;;) 00366 { 00367 while (ns > needleStart && hs > hayStart && ns[-1] == hs[-1]) 00368 { 00369 --hs; 00370 --ns; 00371 } 00372 if (ns <= needleStart || hs <= hayStart) 00373 { 00374 ali->nStart = ns; 00375 ali->hStart = hs; 00376 return ns != oldNs; 00377 } 00378 /* Find fuzzy score to the left. */ 00379 { 00380 int windowSize = 5; 00381 int nSize = ns-needleStart; 00382 int hSize = hs-hayStart; 00383 if (windowSize > nSize) windowSize = nSize; 00384 if (windowSize > hSize) windowSize = hSize; 00385 if (windowSize > 0) 00386 score = dnaScoreMatch(ns-windowSize, hs-windowSize, windowSize); 00387 else 00388 score = -1; 00389 00390 if (score >= windowSize-2) 00391 { 00392 hs -= windowSize; 00393 ns -= windowSize; 00394 } 00395 else if (--numSkips >= 0) 00396 { 00397 struct ffAli *newAli = ffNeedMem(sizeof(*newAli)); 00398 ali->nStart = ns; 00399 ali->hStart = hs; 00400 if (ns - needleStart < 3 || 00401 !leftNextMatch(newAli, needleStart, ns, hayStart, hs, gapPenalty, maxSkip)) 00402 { 00403 return ns != oldNs; /* We did our best... */ 00404 } 00405 newAli->right = ali; 00406 newAli->left = ali->left; 00407 if (ali->left) 00408 ali->left->right = newAli; 00409 ali->left = newAli; 00410 ali = newAli; 00411 expandRight(ali, needleStart, ns, hayStart, hs, 0, gapPenalty, maxSkip); 00412 ns = ali->nStart; 00413 hs = ali->hStart; 00414 } 00415 else 00416 { 00417 ali->nStart = ns; 00418 ali->hStart = hs; 00419 return ns != oldNs; 00420 } 00421 } 00422 } 00423 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean expandRight | ( | struct ffAli * | ali, | |
| DNA * | needleStart, | |||
| DNA * | needleEnd, | |||
| DNA * | hayStart, | |||
| DNA * | hayEnd, | |||
| int | numSkips, | |||
| int | gapPenalty, | |||
| int | maxSkip | |||
| ) | [static] |
Definition at line 425 of file fuzzyFind.c.
References dnaScoreMatch(), expandLeft(), ffNeedMem(), ffAli::hEnd, ffAli::left, ffAli::nEnd, ffAli::right, and rightNextMatch().
Referenced by expandLeft().
00428 { 00429 int score; 00430 DNA *ne = ali->nEnd; 00431 DNA *he = ali->hEnd; 00432 DNA *oldNe = ne; 00433 00434 for (;;) 00435 { 00436 /* Expand as far to the right as you can keeping perfect match */ 00437 while (ne < needleEnd && he < hayEnd && *ne == *he) 00438 { 00439 ++he; 00440 ++ne; 00441 } 00442 00443 /* Check to see if have come to the end. */ 00444 if (ne >= needleEnd || he >= hayEnd) 00445 { 00446 ali->nEnd = ne; 00447 ali->hEnd = he; 00448 return oldNe != ne; 00449 } 00450 00451 /* Find fuzzy score to the right. */ 00452 { 00453 int windowSize = 5; 00454 int nSize = needleEnd-ne; 00455 int hSize = hayEnd - he; 00456 if (windowSize > nSize) windowSize = nSize; 00457 if (windowSize > hSize) windowSize = hSize; 00458 if (windowSize > 0) 00459 score = dnaScoreMatch(ne, he, windowSize); 00460 else 00461 score = -1; 00462 00463 if (score >= windowSize-2) 00464 { 00465 he += windowSize; 00466 ne += windowSize; 00467 } 00468 else if (--numSkips >= 0) 00469 { 00470 struct ffAli *newAli = ffNeedMem(sizeof(*newAli)); 00471 ali->nEnd = ne; 00472 ali->hEnd = he; 00473 if (needleEnd - ne < 3 || 00474 !rightNextMatch(newAli, ne, needleEnd, he, hayEnd, gapPenalty, maxSkip)) 00475 { 00476 return oldNe != ne; /* We did our best... */ 00477 } 00478 newAli->left = ali; 00479 newAli->right = ali->right; 00480 if (ali->right) 00481 ali->right->left = newAli; 00482 ali->right = newAli; 00483 ali = newAli; 00484 expandLeft(ali, ne, needleEnd, he, hayEnd, 0, gapPenalty, maxSkip); 00485 ne = ali->nEnd; 00486 he = ali->hEnd; 00487 } 00488 else 00489 { 00490 ali->nEnd = ne; 00491 ali->hEnd = he; 00492 return oldNe != ne; 00493 } 00494 } 00495 } 00496 }
Here is the call graph for this function:

Here is the caller graph for this function:

| boolean expandThroughNLeft | ( | struct ffAli * | ali, | |
| DNA * | needleStart, | |||
| DNA * | needleEnd, | |||
| DNA * | hayStart, | |||
| DNA * | hayEnd | |||
| ) |
Definition at line 641 of file fuzzyFind.c.
References extendThroughN, FALSE, ffAli::hStart, ffAli::nStart, and TRUE.
Referenced by expandAlis().
00644 { 00645 DNA *nStart = ali->nStart-1; 00646 DNA *hStart = ali->hStart-1; 00647 DNA n,h; 00648 boolean expanded = FALSE; 00649 while (nStart >= needleStart && hStart >= hayStart) 00650 { 00651 n = *nStart; 00652 h = *hStart; 00653 if ((n == h) || 00654 (n == 'n' && (extendThroughN || nStart - 3 < needleStart || nStart[-1] != 'n' || nStart[-2] != 'n' || nStart[-3] != 'n')) || 00655 (h == 'n' && (extendThroughN || hStart - 3 < hayStart || hStart[-1] != 'n' || hStart[-2] != 'n' || hStart[-3] != 'n'))) 00656 { 00657 nStart -= 1; 00658 hStart -= 1; 00659 expanded = TRUE; 00660 } 00661 else 00662 break; 00663 } 00664 ali->nStart = nStart + 1; 00665 ali->hStart = hStart + 1; 00666 return expanded; 00667 }
Here is the caller graph for this function:

| boolean expandThroughNRight | ( | struct ffAli * | ali, | |
| DNA * | needleStart, | |||
| DNA * | needleEnd, | |||
| DNA * | hayStart, | |||
| DNA * | hayEnd | |||
| ) |
Definition at line 611 of file fuzzyFind.c.
References extendThroughN, FALSE, ffAli::hEnd, ffAli::nEnd, and TRUE.
Referenced by expandAlis().
00614 { 00615 DNA *nEnd = ali->nEnd; 00616 DNA *hEnd = ali->hEnd; 00617 DNA n,h; 00618 boolean expanded = FALSE; 00619 while (nEnd < needleEnd && hEnd < hayEnd) 00620 { 00621 n = *nEnd; 00622 h = *hEnd; 00623 if ((n == h) || 00624 (n == 'n' && 00625 (extendThroughN || nEnd + 3 >= needleEnd || nEnd[1] != 'n' || nEnd[2] != 'n' || nEnd[3] != 'n')) || 00626 (h == 'n' && 00627 (extendThroughN || hEnd + 3 >= hayEnd || hEnd[1] != 'n' || hEnd[2] != 'n' || hEnd[3] != 'n'))) 00628 { 00629 nEnd += 1; 00630 hEnd += 1; 00631 expanded = TRUE; 00632 } 00633 else 00634 break; 00635 } 00636 ali->nEnd = nEnd; 00637 ali->hEnd = hEnd; 00638 return expanded; 00639 }
Here is the caller graph for this function:

| static void ffAbort | ( | ) | [static] |
Definition at line 77 of file fuzzyFind.c.
References ffRecover.
Referenced by ffFind(), and saveAliToPermanentMem().
00079 { 00080 longjmp(ffRecover, -1); 00081 }
Here is the caller graph for this function:

| struct ffAli* ffFind | ( | DNA * | needleStart, | |
| DNA * | needleEnd, | |||
| DNA * | hayStart, | |||
| DNA * | hayEnd, | |||
| enum ffStringency | stringency | |||
| ) | [read] |
Definition at line 1428 of file fuzzyFind.c.
References dnaUtilOpen(), ffAbort(), ffCountGoodEnds(), ffMemCleanup(), ffMemInit(), ffRecover, findBestAli(), popAbortHandler(), pushAbortHandler(), and saveAliToPermanentMem().
Referenced by alignComponents(), ffFindAndScore(), smallMiddleExons(), and ssFindBundles().
01431 { 01432 struct ffAli *bestAli; 01433 int status; 01434 01435 assert(needleStart <= needleEnd); 01436 assert(hayStart <= hayEnd); 01437 01438 ffMemInit(); 01439 dnaUtilOpen(); 01440 01441 01442 /* Set up error recovery. */ 01443 status = setjmp(ffRecover); 01444 if (status == 0) /* Always true except after long jump. */ 01445 { 01446 pushAbortHandler(ffAbort); 01447 bestAli = findBestAli(needleStart, needleEnd, hayStart, hayEnd, stringency); 01448 ffCountGoodEnds(bestAli); 01449 bestAli = saveAliToPermanentMem(bestAli); 01450 } 01451 else /* They long jumped here because of an error. */ 01452 { 01453 bestAli = NULL; 01454 } 01455 popAbortHandler(); 01456 ffMemCleanup(); 01457 return bestAli; 01458 }
Here is the call graph for this function:

Here is the caller graph for this function:

| boolean ffFindAndScore | ( | DNA * | needle, | |
| int | needleSize, | |||
| DNA * | haystack, | |||
| int | haySize, | |||
| enum ffStringency | stringency, | |||
| struct ffAli ** | pAli, | |||
| boolean * | pRcNeedle, | |||
| int * | pScore | |||
| ) |
Definition at line 1460 of file fuzzyFind.c.
References FALSE, ffFind(), ffFreeAli(), ffScore(), reverseComplement(), and TRUE.
Referenced by ffFindEitherStrandN().
01465 { 01466 int fScore, rScore; 01467 struct ffAli *fAli, *rAli; 01468 01469 /* Get forward alignment and score it. */ 01470 fAli = ffFind(needle, needle+needleSize, haystack, haystack+haySize, stringency); 01471 fScore = ffScore(fAli,stringency); 01472 01473 /* Get reverse alignment and score it. */ 01474 reverseComplement(needle, needleSize); 01475 rAli = ffFind(needle, needle+needleSize, haystack, haystack+haySize, stringency); 01476 rScore = ffScore(rAli,stringency); 01477 reverseComplement(needle, needleSize); 01478 01479 /* If no good alignment on either strand return FALSE. */ 01480 if (fAli == NULL && rAli == NULL) 01481 { 01482 *pAli = NULL; 01483 return FALSE; 01484 } 01485 01486 /* Return TRUE with best alignment. Free the other one. */ 01487 if (fScore > rScore) 01488 { 01489 *pAli = fAli; 01490 *pRcNeedle = FALSE; 01491 if (pScore != NULL) 01492 *pScore = fScore; 01493 ffFreeAli(&rAli); 01494 } 01495 else 01496 { 01497 *pAli = rAli; 01498 *pRcNeedle = TRUE; 01499 if (pScore != NULL) 01500 *pScore = rScore; 01501 ffFreeAli(&fAli); 01502 } 01503 return TRUE; 01504 }
Here is the call graph for this function:

Here is the caller graph for this function:

| boolean ffFindEitherStrand | ( | DNA * | needle, | |
| DNA * | haystack, | |||
| enum ffStringency | stringency, | |||
| struct ffAli ** | pAli, | |||
| boolean * | pRcNeedle | |||
| ) |
Definition at line 1514 of file fuzzyFind.c.
References ffFindEitherStrandN().
01519 { 01520 return ffFindEitherStrandN(needle, strlen(needle), haystack, strlen(haystack), 01521 stringency, pAli, pRcNeedle); 01522 }
Here is the call graph for this function:

| boolean ffFindEitherStrandN | ( | DNA * | needle, | |
| int | needleSize, | |||
| DNA * | haystack, | |||
| int | haySize, | |||
| enum ffStringency | stringency, | |||
| struct ffAli ** | pAli, | |||
| boolean * | pRcNeedle | |||
| ) |
Definition at line 1506 of file fuzzyFind.c.
References ffFindAndScore().
Referenced by ffFindEitherStrand().
01510 { 01511 return ffFindAndScore(needle, needleSize, haystack, haySize, stringency, pAli, pRcNeedle, NULL); 01512 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean ffFindGoodOligo | ( | DNA * | needle, | |
| int | needleLength, | |||
| double | maxProb, | |||
| double | freq[4], | |||
| DNA ** | rOligo, | |||
| int * | rOligoLength, | |||
| double * | rOligoProb | |||
| ) | [static] |
Definition at line 219 of file fuzzyFind.c.
References FALSE, findImprobableOligo(), hasRepeat(), and TRUE.
Referenced by rwFindTilesBetween().
00223 { 00224 int oligoLen; 00225 DNA *oligo; 00226 00227 /* Loop around until you get one that doesn't repeat. */ 00228 for (;;) 00229 { 00230 if (!findImprobableOligo(needle, needleLength, maxProb, freq, rOligo, rOligoLength, rOligoProb)) 00231 return FALSE; 00232 oligoLen = *rOligoLength; 00233 oligo = *rOligo; 00234 if (hasRepeat(oligo, oligoLen)) 00235 { 00236 DNA *newNeedle = oligo+oligoLen; 00237 int newSize = needleLength - (newNeedle-needle); 00238 if (newSize <= 0) 00239 return FALSE; 00240 needle = newNeedle; 00241 needleLength = newSize; 00242 } 00243 else 00244 return TRUE; 00245 } 00246 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ffMemCleanup | ( | ) | [static] |
Definition at line 91 of file fuzzyFind.c.
References ffMemPool, and lmCleanup().
Referenced by ffFind().
Here is the call graph for this function:

Here is the caller graph for this function:

| static void ffMemInit | ( | ) | [static] |
| static void* ffNeedMem | ( | size_t | size | ) | [static] |
Definition at line 97 of file fuzzyFind.c.
References ffMemPool, and lmAlloc().
Referenced by exactAli(), expandLeft(), expandRight(), findAliBetween(), lumpHits(), and rwFindTilesBetween().
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 847 of file fuzzyFind.c.
References ffAli::left, and ffAli::right.
Referenced by mergeProtoGenes(), and unlinkAli().
00850 { 00851 struct ffAli *list = *pList; 00852 struct ffAli *right = el->right; 00853 struct ffAli *left = el->left; 00854 00855 if (el == list) /* If first element need to update list. */ 00856 *pList = right; 00857 if (right != NULL) 00858 right->left = left; 00859 if (left != NULL) 00860 left->right = right; 00861 el->left = el->right = NULL; 00862 }
Here is the caller graph for this function:

| struct ffAli* findAliBetween | ( | DNA * | tile, | |
| int | tileSize, | |||
| DNA * | ns, | |||
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he | |||
| ) | [read] |
Definition at line 783 of file fuzzyFind.c.
References ffExpandExactLeft(), ffExpandExactRight(), ffNeedMem(), and matchInMem().
00784 { 00785 DNA *match; 00786 DNA *tileEnd = tile + tileSize; 00787 int tries = 0; 00788 for (;;) 00789 { 00790 if ((match = matchInMem(tile, tileEnd, hs, he)) == NULL) 00791 return NULL; 00792 if (matchInMem(tile, tileEnd, match+1, he) == NULL) 00793 { 00794 /* Got exactly one match, whoopie! */ 00795 struct ffAli *ali = ffNeedMem(sizeof(*ali)); 00796 ali->nStart = tile; 00797 ali->nEnd = tileEnd; 00798 ali->hStart = match; 00799 ali->hEnd = match + (tileEnd-tile); 00800 ffExpandExactLeft(ali, ns, hs); 00801 ffExpandExactRight(ali, ne, he); 00802 return ali; 00803 } 00804 00805 /* Got more than one match. Expand tile to make it more specific. 00806 * In general first add base to start, then base to beginning. If 00807 * at beginning or end already are constrained. If already spanning 00808 * full needle, can't expand, so you're done. */ 00809 if (tile <= ns) 00810 { 00811 if (tileEnd >= ne) 00812 return NULL; 00813 ++tileEnd; 00814 } 00815 else if (tile >= tileEnd) 00816 { 00817 --tile; 00818 } 00819 else if (tries & 1) 00820 { 00821 ++tileEnd; 00822 } 00823 else 00824 { 00825 --tile; 00826 } 00827 ++tries; 00828 } 00829 }
Here is the call graph for this function:

| static struct ffAli* findBestAli | ( | DNA * | ns, | |
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| enum ffStringency | stringency | |||
| ) | [static, read] |
Definition at line 1364 of file fuzzyFind.c.
References expandAlis(), FALSE, ffCdna, ffMergeHayOverlaps(), ffMergeNeedleAlis(), ffRemoveEmptyAlis(), ffSlideIntrons(), findWovenTiles(), nextPowerOfFour(), reconsiderAlignedGaps(), and trimAlis().
Referenced by ffFind().
01365 { 01366 static int iniExpGapPen[] = {0 /* (exact) */, 4 /* cDna */, 4 /* tight */, 4 /* loose */}; 01367 static int addExpGapPen[] = {0 /* (exact) */, 3 /* cDna */, 3 /* tight */, 3 /* loose */}; 01368 static int midTileMinSize[] ={0 /*(exact) */,12 /* cDNA */, 12 /* tight */, 4 /* loose */}; 01369 01370 struct ffAli *bestAli; 01371 int matchSize; 01372 01373 matchSize = nextPowerOfFour(he-hs)+1; 01374 if (matchSize < midTileMinSize[stringency]) 01375 matchSize = midTileMinSize[stringency]; 01376 01377 bestAli = findWovenTiles(ns, ne, hs, he, stringency); 01378 if (bestAli == NULL) 01379 return NULL; 01380 01381 bestAli = ffMergeNeedleAlis(bestAli, FALSE); 01382 bestAli = expandAlis(bestAli,ns,ne,hs,he,iniExpGapPen[stringency], 1); 01383 bestAli = ffMergeNeedleAlis(bestAli, FALSE); 01384 bestAli = expandAlis(bestAli,ns,ne,hs,he,addExpGapPen[stringency], 2*matchSize); 01385 bestAli = trimAlis(bestAli); 01386 bestAli = ffMergeNeedleAlis(bestAli, FALSE); 01387 bestAli = ffMergeHayOverlaps(bestAli); 01388 bestAli = reconsiderAlignedGaps(bestAli); 01389 bestAli = ffRemoveEmptyAlis(bestAli, FALSE); 01390 bestAli = ffMergeNeedleAlis(bestAli, FALSE); 01391 if (stringency == ffCdna) 01392 ffSlideIntrons(bestAli); 01393 bestAli = ffRemoveEmptyAlis(bestAli, FALSE); 01394 return bestAli; 01395 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean findImprobableOligo | ( | DNA * | needle, | |
| int | needleLength, | |||
| double | maxProb, | |||
| double | freq[4], | |||
| DNA ** | rOligo, | |||
| int * | rOligoLength, | |||
| double * | rOligoProb | |||
| ) | [static] |
Definition at line 130 of file fuzzyFind.c.
References FALSE, ntVal, and TRUE.
Referenced by ffFindGoodOligo().
00134 { 00135 int i; 00136 double totalProb = 1.0; 00137 int base; 00138 int startIx = 0; 00139 00140 for (i=0;i<needleLength;++i) 00141 { 00142 if ((base = ntVal[(int)needle[i]]) < 0) 00143 { 00144 totalProb = 1.0; 00145 startIx = i+1; 00146 } 00147 else 00148 { 00149 if ((totalProb *= freq[base]) <= maxProb) 00150 { 00151 *rOligo = needle+startIx; 00152 *rOligoLength = i-startIx+1; 00153 *rOligoProb = totalProb; 00154 return TRUE; 00155 } 00156 } 00157 } 00158 return FALSE; 00159 }
Here is the caller graph for this function:

| static struct ffAli* findWovenTiles | ( | DNA * | ns, | |
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| enum ffStringency | stringency | |||
| ) | [static, read] |
Definition at line 1342 of file fuzzyFind.c.
References ffCdna, ffTight, makeFreqTable(), and recursiveWeave().
Referenced by findBestAli().
01343 { 01344 struct ffAli *bestAli; 01345 int haySize = he - hs; 01346 int needleSize = ne - ns; 01347 static double tileStrinProbMult[] = { 0.0001, 0.0005, 0.0005, 0.5, }; 01348 /* exact cDNA tight loose */ 01349 if (needleSize < 2 || haySize < 2) /* Be serious man! */ 01350 return NULL; 01351 01352 /* Set up rwVariables - essentially locals except that they don't change over the course 01353 * of the recursion. */ 01354 makeFreqTable(hs, haySize, rwFreq); 01355 rwIsCdna = (stringency == ffCdna); 01356 rwCheckGoodEnough = (stringency == ffTight || stringency == ffCdna); 01357 01358 bestAli = recursiveWeave(ns, ne, hs, he, stringency, tileStrinProbMult[stringency], 1, 0); 01359 01360 return bestAli; 01361 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean hasRepeat | ( | DNA * | oligo, | |
| int | oligoLen | |||
| ) | [static] |
Definition at line 162 of file fuzzyFind.c.
Referenced by ffFindGoodOligo().
00164 { 00165 int mod; 00166 int i; 00167 DNA b; 00168 boolean gotRepeat; 00169 int repSize; 00170 int maxRep = (oligoLen+1)/2; 00171 00172 b = oligo[0]; 00173 gotRepeat = TRUE; 00174 for (i=1; i<oligoLen; ++i) 00175 { 00176 if (oligo[i] != b) 00177 { 00178 gotRepeat = FALSE; 00179 break; 00180 } 00181 } 00182 if (gotRepeat) 00183 return TRUE; 00184 00185 gotRepeat = TRUE; 00186 for (i=2; i<oligoLen; ++i) 00187 { 00188 if (oligo[i&1] != oligo[i]) 00189 { 00190 gotRepeat = FALSE; 00191 break; 00192 } 00193 } 00194 if (gotRepeat) 00195 return TRUE; 00196 00197 for (repSize = 3; repSize <= maxRep; ++repSize) 00198 { 00199 mod = 0; 00200 gotRepeat = TRUE; 00201 for (i=repSize; i<oligoLen; ++i) 00202 { 00203 if (oligo[mod] != oligo[i]) 00204 { 00205 gotRepeat = FALSE; 00206 break; 00207 } 00208 if (++mod == repSize) 00209 mod = 0; 00210 } 00211 if (gotRepeat) 00212 return TRUE; 00213 } 00214 return gotRepeat; 00215 }
Here is the caller graph for this function:

| static boolean leftNextMatch | ( | struct ffAli * | ali, | |
| DNA * | ns, | |||
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| int | gapPenalty, | |||
| int | maxSkip | |||
| ) | [static] |
Definition at line 248 of file fuzzyFind.c.
References digitsBaseTwo(), FALSE, ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, and TRUE.
Referenced by expandLeft().
00252 { 00253 int haySize = he - hs; 00254 int needleSize = ne - ns; 00255 int diagSize = haySize + needleSize; 00256 int matchSize; 00257 int i; 00258 00259 /* We take care of bigger skips on the "tile" level. */ 00260 if (diagSize > maxSkip) 00261 diagSize = maxSkip; 00262 /* Scan diagonally... */ 00263 /* 00264 0 1 2 3 00265 1 2 3 4 00266 2 3 4 5 00267 3 4 5 6 00268 */ 00269 for (i=1; i<=diagSize; ++i) 00270 { 00271 int hOff = i; 00272 int nOff = 0; 00273 int hDiff = hOff - haySize; 00274 matchSize = gapPenalty + digitsBaseTwo(i); 00275 if (hDiff > 0) 00276 { 00277 nOff += hDiff; 00278 hOff -= hDiff; 00279 } 00280 for (;hOff >= 0; --hOff, ++nOff) 00281 { 00282 int needleLeft = needleSize - nOff; 00283 int hayLeft = haySize - hOff; 00284 if (matchSize > needleLeft) break; 00285 if (matchSize > hayLeft) continue; 00286 if (ne[-nOff-1] == he[-hOff-1] && memcmp(ne-nOff-matchSize, he-hOff-matchSize, matchSize) == 0) 00287 { 00288 ali->nStart = ne - nOff - matchSize; 00289 ali->nEnd = ne - nOff; 00290 ali->hStart = he - hOff - matchSize; 00291 ali->hEnd = he- hOff; 00292 return TRUE; 00293 } 00294 } 00295 } 00296 return FALSE; 00297 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 1005 of file fuzzyFind.c.
References ffMakeRightLinks(), ffNeedMem(), ffAli::hEnd, protoGene::hEnd, protoGene::hits, ffAli::hStart, protoGene::hStart, ffAli::left, protoGene::left, ffAli::nEnd, protoGene::nEnd, ffAli::nStart, protoGene::nStart, protoGene::right, ffAli::right, and unlinkAli().
01009 { 01010 struct protoGene proto; 01011 struct protoGene *retProto; 01012 struct ffAli *ali, *nextAli; 01013 int dif, lastDif; 01014 int lumpCount = 1; 01015 01016 if ((ali = *pHitList) == NULL) 01017 return NULL; 01018 01019 /* Move first hit onto our crude exon. */ 01020 nextAli = ali->right; 01021 proto.left = proto.right = NULL; 01022 unlinkAli(pHitList, ali); 01023 proto.hits = ali; 01024 proto.hStart = ali->hStart; 01025 proto.hEnd = ali->hEnd; 01026 proto.nStart = ali->nStart; 01027 proto.nEnd = ali->nEnd; 01028 lastDif = ali->hStart - ali->nStart; 01029 01030 /* Go through rest of list and put others that are close to 01031 * on the same diagonal onto this exon. */ 01032 while ((ali = nextAli) != NULL) 01033 { 01034 nextAli = ali->right; 01035 dif = ali->hStart - ali->nStart; 01036 if (lastDif - 2 <= dif && dif <= lastDif + 2) 01037 { 01038 lastDif = dif; 01039 unlinkAli(pHitList, ali); 01040 ali->left = proto.hits; 01041 proto.hits = ali; 01042 proto.hEnd = ali->hEnd; 01043 proto.nEnd = ali->nEnd; 01044 ++lumpCount; 01045 } 01046 } 01047 proto.hits = ffMakeRightLinks(proto.hits); 01048 retProto = ffNeedMem(sizeof(*retProto)); 01049 memcpy(retProto, &proto, sizeof(*retProto)); 01050 return retProto; 01051 }
Here is the call graph for this function:

| void lumpProtoGenes | ( | struct protoGene ** | pList, | |
| DNA * | ns, | |||
| DNA * | hs, | |||
| enum ffStringency | stringency | |||
| ) |
Definition at line 993 of file fuzzyFind.c.
References bestMerger(), and mergeProtoGenes().
00996 { 00997 struct protoGene *a, *b; 00998 00999 while (bestMerger(*pList, stringency, ns, hs, &a, &b)) 01000 { 01001 mergeProtoGenes(pList, a, b); 01002 } 01003 }
Here is the call graph for this function:

| static void makeFreqTable | ( | DNA * | dna, | |
| int | dnaSize, | |||
| double | freq[4] | |||
| ) | [static] |
Definition at line 103 of file fuzzyFind.c.
References dnaBaseHistogram().
Referenced by findWovenTiles().
00104 { 00105 int histo[4]; 00106 double total; 00107 int i; 00108 00109 dnaBaseHistogram(dna, dnaSize, histo); 00110 total = histo[0] + histo[1] + histo[2] + histo[3]; 00111 if (total == 0) total = 1; 00112 for (i=0; i<4; ++i) 00113 freq[i] = (double)histo[i] / total; 00114 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 767 of file fuzzyFind.c.
Referenced by findAliBetween(), and rwFindTilesBetween().
00768 { 00769 int nLen = ne - ns; 00770 DNA c = *ns++; 00771 he -= nLen; 00772 nLen -= 1; 00773 while (hs < he) 00774 { 00775 if (*hs++ == c && memcmp(ns, hs, nLen) == 0) 00776 { 00777 return hs-1; 00778 } 00779 } 00780 return NULL; 00781 }
Here is the caller graph for this function:

Definition at line 978 of file fuzzyFind.c.
References ffCat(), ffUnlink(), ffAli::hEnd, protoGene::hEnd, protoGene::hits, ffAli::hStart, protoGene::hStart, ffAli::nEnd, protoGene::nEnd, ffAli::nStart, and protoGene::nStart.
Referenced by lumpProtoGenes().
00980 { 00981 ffUnlink((struct ffAli**)pList, (struct ffAli *)b); 00982 ffCat(&a->hits, &b->hits); 00983 if (a->hStart > b->hStart) 00984 a->hStart = b->hStart; 00985 if (a->nStart > b->nStart) 00986 a->nStart = b->nStart; 00987 if (a->hEnd < b->hEnd) 00988 a->hEnd = b->hEnd; 00989 if (a->nEnd < b->nEnd) 00990 a->nEnd = b->nEnd; 00991 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static double oligoProb | ( | DNA * | oligo, | |
| int | size, | |||
| double | freq[4] | |||
| ) | [static] |
Definition at line 116 of file fuzzyFind.c.
References ntVal.
Referenced by evalExactAli().
00117 { 00118 double prob = 1.0; 00119 int i; 00120 int baseVal; 00121 00122 for (i=0; i<size; ++i) 00123 { 00124 if ((baseVal = ntVal[(int)oligo[i]]) >= 0) 00125 prob *= freq[baseVal]; 00126 } 00127 return prob; 00128 }
Here is the caller graph for this function:

Definition at line 541 of file fuzzyFind.c.
References dnaScoreMatch(), ffCdnaGapPenalty(), ffAli::hEnd, ffAli::hStart, ffAli::left, matchScore, ffAli::nEnd, ffAli::nStart, and ffAli::right.
Referenced by findBestAli().
00545 { 00546 struct ffAli *a = NULL; 00547 struct ffAli *left = NULL; 00548 00549 if (ali == NULL) 00550 return NULL; 00551 a = ali; 00552 for (;;) 00553 { 00554 int gapSize; 00555 00556 /* Advance to next ali */ 00557 left = a; 00558 a = a->right; 00559 if (a == NULL) 00560 break; 00561 gapSize = a->nStart - left->nEnd; 00562 if (gapSize == a->hStart - left->hEnd) 00563 { 00564 int gapScore; 00565 int matchScore; 00566 gapScore = -ffCdnaGapPenalty(left, a); 00567 matchScore = dnaScoreMatch(left->nEnd, left->hEnd, gapSize); 00568 if (matchScore > gapScore) 00569 { 00570 /* Make current cover left. RemoveEmpty will take 00571 * care of empty shell of left later. */ 00572 a->hStart = left->hEnd = left->hStart; 00573 a->nStart = left->nEnd = left->nStart; 00574 } 00575 } 00576 } 00577 return ali; 00578 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct ffAli* recursiveWeave | ( | DNA * | ns, | |
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| enum ffStringency | stringency, | |||
| double | probMax, | |||
| int | level, | |||
| int | orientation | |||
| ) | [static, read] |
Definition at line 1255 of file fuzzyFind.c.
References exactAli(), ffExact, ffIntronOrientation(), ffAli::hEnd, ffAli::left, ffAli::nEnd, ffAli::right, and rwFindTilesBetween().
Referenced by findWovenTiles().
01259 { 01260 struct ffAli *left = NULL, *right = NULL, *aliList; 01261 01262 if ((left = exactAli(ns, ne, hs, he)) != NULL) 01263 return left; 01264 if (stringency == ffExact) 01265 return NULL; 01266 01267 aliList = rwFindTilesBetween(ns, ne, hs, he, stringency, probMax); 01268 if (aliList != NULL) 01269 { 01270 DNA *lne, *rns, *lhe, *rhs; 01271 int ndif, hdif; 01272 right = aliList; 01273 if (orientation == 0) 01274 orientation = ffIntronOrientation(aliList); 01275 for (;;) 01276 { 01277 /* Figure out the end points to recurse to. */ 01278 if (left != NULL) 01279 { 01280 lne = left->nEnd; 01281 lhe = left->hEnd; 01282 } 01283 else 01284 { 01285 lne = ns; 01286 lhe = hs; 01287 } 01288 if (right != NULL) 01289 { 01290 rns = right->nStart; 01291 rhs = right->hStart; 01292 } 01293 else 01294 { 01295 rns = ne; 01296 rhs = he; 01297 } 01298 ndif = rns-lne; 01299 hdif = rhs-lhe; 01300 /* If a big enough gap left recurse. */ 01301 if (ndif >= 5 && hdif >= 5) 01302 { 01303 struct ffAli *newLeft = NULL, *newRight; 01304 01305 newLeft = recursiveWeave(lne, rns, lhe, rhs, stringency, probMax*2, level+1, 01306 orientation); 01307 if (newLeft != NULL) 01308 { 01309 /* Insert new tiles between left and right. */ 01310 01311 /* Find right end of tile set. */ 01312 newRight = newLeft; 01313 while (newRight->right != NULL) 01314 newRight = newRight->right; 01315 01316 01317 if (left != NULL) 01318 { 01319 left->right = newLeft; 01320 newLeft->left = left; 01321 } 01322 else 01323 { 01324 aliList = newLeft; 01325 } 01326 if (right != NULL) 01327 { 01328 right->left = newRight; 01329 newRight->right = right; 01330 } 01331 } 01332 } 01333 if ((left = right) == NULL) 01334 break; 01335 right = right->right; 01336 } 01337 } 01338 return aliList; 01339 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean rightNextMatch | ( | struct ffAli * | ali, | |
| DNA * | ns, | |||
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| int | gapPenalty, | |||
| int | maxSkip | |||
| ) | [static] |
Definition at line 300 of file fuzzyFind.c.
References digitsBaseTwo(), FALSE, ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, and TRUE.
Referenced by expandRight().
00304 { 00305 int haySize = he - hs; 00306 int needleSize = ne - ns; 00307 int diagSize = haySize + needleSize; 00308 int matchSize; 00309 int i; 00310 00311 /* We take care of bigger skips on the "tile" level. */ 00312 if (diagSize > maxSkip) 00313 diagSize = maxSkip; 00314 /* Scan diagonally... */ 00315 /* 00316 0 1 2 3 00317 1 2 3 4 00318 2 3 4 5 00319 3 4 5 6 00320 */ 00321 for (i=1; i<=diagSize; ++i) 00322 { 00323 int hOff = i; 00324 int nOff = 0; 00325 int hDiff = hOff - haySize; 00326 matchSize = gapPenalty + digitsBaseTwo(i); 00327 if (hDiff > 0) 00328 { 00329 nOff += hDiff; 00330 hOff -= hDiff; 00331 } 00332 for (;hOff >= 0; --hOff, ++nOff) 00333 { 00334 int needleLeft = needleSize - nOff; 00335 int hayLeft = haySize - hOff; 00336 if (matchSize > needleLeft) break; 00337 if (matchSize > hayLeft) continue; 00338 if (ns[nOff] == hs[hOff] && memcmp(ns+nOff, hs+hOff, matchSize) == 0) 00339 { 00340 ali->nStart = ns + nOff; 00341 ali->nEnd = ns + nOff + matchSize; 00342 ali->hStart = hs + hOff; 00343 ali->hEnd = hs + hOff + matchSize; 00344 return TRUE; 00345 } 00346 } 00347 } 00348 return FALSE; 00349 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct ffAli* rwFindTilesBetween | ( | DNA * | ns, | |
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| enum ffStringency | stringency, | |||
| double | probMax | |||
| ) | [static, read] |
Definition at line 1159 of file fuzzyFind.c.
References evalExactAli(), FALSE, ffExpandExactLeft(), ffExpandExactRight(), ffFindGoodOligo(), ffMakeRightLinks(), ffNeedMem(), matchInMem(), nextPowerOfFour(), ffAli::right, tileSize, and weaveAli().
Referenced by recursiveWeave().
01163 { 01164 int searchOffset; 01165 int endTileOffset; 01166 int haySize = he - hs; 01167 int needleSize = ne - ns; 01168 int numTiles = 0; 01169 int bestWeaveVal; 01170 int tileIx; 01171 struct ffAli *hitList = NULL; 01172 struct ffAli *bestAli; 01173 struct ffAli *ali; 01174 int possibleTiles; 01175 double tileProbOne; 01176 01177 possibleTiles = (haySize - nextPowerOfFour(needleSize)); 01178 if (possibleTiles < 1) possibleTiles = 1; 01179 tileProbOne = probMax/possibleTiles; 01180 01181 searchOffset = 0; 01182 endTileOffset = 0; 01183 tileIx = 0; 01184 for (;;) 01185 { 01186 DNA *tile; 01187 int tileSize; 01188 double tileProb; 01189 DNA *h = hs; 01190 01191 searchOffset = endTileOffset; 01192 if (!ffFindGoodOligo(ns+searchOffset, needleSize-searchOffset, tileProbOne, 01193 rwFreq, &tile, &tileSize, &tileProb)) 01194 { 01195 break; 01196 } 01197 /* Find all parts that match the tile. */ 01198 for (;;) 01199 { 01200 if ((h = matchInMem(tile, tile+tileSize, h, he)) == NULL) 01201 break; 01202 ali = ffNeedMem(sizeof(*ali)); 01203 ali->hStart = h; 01204 ali->hEnd = ali->hStart + tileSize; 01205 ali->nStart = tile; 01206 ali->nEnd = tile + tileSize; 01207 ali->left = hitList; 01208 hitList = ali; 01209 h += tileSize; 01210 } 01211 endTileOffset = tile + tileSize - ns; 01212 ++numTiles; 01213 } 01214 if (hitList == NULL) 01215 return NULL; 01216 01217 hitList = ffMakeRightLinks(hitList); 01218 for (ali = hitList; ali != NULL; ali = ali->right) 01219 { 01220 ffExpandExactLeft(ali, ns, hs); 01221 ffExpandExactRight(ali, ne, he); 01222 } 01223 bestAli = weaveAli(hitList, ns, ne, hs, he, rwFreq, &bestWeaveVal, stringency); 01224 if (rwCheckGoodEnough) 01225 { 01226 double prob; 01227 prob = evalExactAli(bestAli, ns, ne, hs, he, numTiles, rwFreq); 01228 if (prob > 0.1) 01229 return NULL; 01230 rwCheckGoodEnough = FALSE; 01231 } 01232 01233 return bestAli; 01234 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 1398 of file fuzzyFind.c.
References ffAbort(), ffAli::left, needMem(), ffAli::right, and slFreeList().
Referenced by ffFind().
01400 { 01401 struct ffAli *leftList = NULL; 01402 struct ffAli *newAli, *ali; 01403 struct ffAli *rightList = NULL; 01404 01405 /* Allocate memory, copy contents and set left pointer. */ 01406 for (ali = volatileAli; ali != NULL; ali = ali->right) 01407 { 01408 newAli = needMem(sizeof(*newAli)); 01409 if (newAli == NULL) 01410 { 01411 slFreeList(leftList); 01412 ffAbort(); 01413 } 01414 memcpy(newAli, ali, sizeof(*newAli)); 01415 newAli->left = leftList; 01416 leftList = newAli; 01417 } 01418 01419 /* Set right pointer. */ 01420 for (ali = leftList; ali != NULL; ali = ali->left) 01421 { 01422 ali->right = rightList; 01423 rightList = ali; 01424 } 01425 return rightList; 01426 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void setFfExtendThroughN | ( | boolean | val | ) |
Definition at line 605 of file fuzzyFind.c.
References extendThroughN.
Referenced by main().
00607 { 00608 extendThroughN = val; 00609 }
Here is the caller graph for this function:

| void thinProtoList | ( | struct protoGene ** | pList, | |
| int | maxToTake | |||
| ) |
Definition at line 1080 of file fuzzyFind.c.
References cmpProtoScore(), and slSort().
01083 { 01084 struct protoGene *newList = NULL, *pg, *next; 01085 int i; 01086 01087 slSort(pList, cmpProtoScore); 01088 for (pg=*pList, i=0; pg != NULL && i < maxToTake; pg = next, ++i) 01089 { 01090 next = pg->left; 01091 pg->left = newList; 01092 newList = pg; 01093 } 01094 *pList = newList; 01095 }
Here is the call graph for this function:

Definition at line 580 of file fuzzyFind.c.
References ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, and trimCount.
Referenced by findBestAli().
00582 { 00583 struct ffAli *ali; 00584 int trimCount = 0; 00585 for (ali = aliList; ali != NULL; ali = ali->right) 00586 { 00587 while (ali->nStart[0] != ali->hStart[0] && ali->nStart < ali->nEnd) 00588 { 00589 ali->nStart += 1; 00590 ali->hStart += 1; 00591 ++trimCount; 00592 } 00593 while (ali->nEnd[-1] != ali->hEnd[-1] && ali->nEnd > ali->nStart) 00594 { 00595 ali->nEnd -= 1; 00596 ali->hEnd -= 1; 00597 ++trimCount; 00598 } 00599 } 00600 return aliList; 00601 }
Here is the caller graph for this function:

Definition at line 864 of file fuzzyFind.c.
References ffUnlink().
Referenced by lumpHits(), and weaveAli().
00866 { 00867 ffUnlink(pList, ali); 00868 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct ffAli* weaveAli | ( | struct ffAli * | hitList, | |
| DNA * | ns, | |||
| DNA * | ne, | |||
| DNA * | hs, | |||
| DNA * | he, | |||
| double | freq[4], | |||
| int * | rBestVal, | |||
| enum ffStringency | stringency | |||
| ) | [static, read] |
Definition at line 1097 of file fuzzyFind.c.
References ffAliSort(), ffCmpHitsHayFirst(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, and unlinkAli().
Referenced by rwFindTilesBetween().
01101 { 01102 struct ffAli *ali, *lastAli; 01103 struct protoGene *proto, *protoList = NULL; 01104 int protoCount = 0; 01105 01106 /* Sort initial hits and remove duplicates. */ 01107 ffAliSort(&hitList, ffCmpHitsHayFirst); 01108 ali = hitList; 01109 for (;;) 01110 { 01111 lastAli = ali; 01112 if ((ali = ali->right) == NULL) 01113 break; 01114 if (ali->hStart == lastAli->hStart && ali->hEnd == lastAli->hEnd 01115 && ali->nStart == lastAli->nStart && ali->nEnd == lastAli->nEnd) 01116 { 01117 unlinkAli(&hitList, lastAli); 01118 } 01119 } 01120 01121 /* Lump together things which look to be separated only by small 01122 * amounts of noise. */ 01123 while ((proto = lumpHits(&hitList, ns, hs)) != NULL) 01124 { 01125 proto->left = protoList; 01126 protoList = proto; 01127 ++protoCount; 01128 } 01129 if (protoCount > 200) 01130 { 01131 thinProtoList(&protoList, 200); 01132 } 01133 protoList = (struct protoGene *)ffMakeRightLinks((struct ffAli*)protoList); 01134 01135 01136 /* Lump together any other ones that can, starting with ones 01137 * that go together best. */ 01138 ffAliSort((struct ffAli**)(void*)&protoList, cmpProtoNeedle); 01139 lumpProtoGenes(&protoList, ns, hs, stringency ); 01140 for (proto = protoList; proto != NULL; proto = proto->right) 01141 { 01142 ffAliSort((struct ffAli**)&proto->hits, ffCmpHitsNeedleFirst); 01143 /* removeThrowbackHits(proto); */ 01144 proto->score = ffScore(proto->hits,stringency); 01145 } 01146 01147 /* Sort by score and return the best. */ 01148 ffAliSort((struct ffAli **)(void*)&protoList, cmpProtoScore); 01149 *rBestVal = protoList->score; 01150 return protoList->hits; 01151 }
Here is the call graph for this function:

Here is the caller graph for this function:

int extendThroughN [static] |
Definition at line 603 of file fuzzyFind.c.
Referenced by expandThroughNLeft(), expandThroughNRight(), and setFfExtendThroughN().
Definition at line 83 of file fuzzyFind.c.
Referenced by ffMemCleanup(), ffMemInit(), and ffNeedMem().
jmp_buf ffRecover [static] |
char const rcsid[] = "$Id: fuzzyFind.c,v 1.17 2006/05/09 01:05:55 markd Exp $" [static] |
Definition at line 43 of file fuzzyFind.c.
boolean rwCheckGoodEnough [static] |
Definition at line 1157 of file fuzzyFind.c.
double rwFreq[4] [static] |
Definition at line 1155 of file fuzzyFind.c.
boolean rwIsCdna [static] |
Definition at line 1156 of file fuzzyFind.c.
1.5.2