jkOwnLib/fuzzyFind.c File Reference

#include "common.h"
#include "dnautil.h"
#include "localmem.h"
#include "errabort.h"
#include "fuzzyFind.h"
#include "obscure.h"

Include dependency graph for fuzzyFind.c:

Go to the source code of this file.

Data Structures

struct  protoGene

Functions

void dumpFf (struct ffAli *left, DNA *needle, DNA *hay)
static void ffAbort ()
static void ffMemInit ()
static void ffMemCleanup ()
static void * ffNeedMem (size_t size)
static void makeFreqTable (DNA *dna, int dnaSize, double freq[4])
static double oligoProb (DNA *oligo, int size, double freq[4])
static boolean findImprobableOligo (DNA *needle, int needleLength, double maxProb, double freq[4], DNA **rOligo, int *rOligoLength, double *rOligoProb)
static boolean hasRepeat (DNA *oligo, int oligoLen)
static boolean ffFindGoodOligo (DNA *needle, int needleLength, double maxProb, double freq[4], DNA **rOligo, int *rOligoLength, double *rOligoProb)
static boolean leftNextMatch (struct ffAli *ali, DNA *ns, DNA *ne, DNA *hs, DNA *he, int gapPenalty, int maxSkip)
static boolean rightNextMatch (struct ffAli *ali, DNA *ns, DNA *ne, DNA *hs, DNA *he, int gapPenalty, int maxSkip)
static boolean expandRight (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd, int numSkips, int gapPenalty, int maxSkip)
static boolean expandLeft (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd, int numSkips, int gapPenalty, int maxSkip)
static boolean exactFind (DNA *needle, int nSize, DNA *hay, int hSize, int *hOffset)
static struct ffAlireconsiderAlignedGaps (struct ffAli *ali)
static struct ffAlitrimAlis (struct ffAli *aliList)
void setFfExtendThroughN (boolean val)
boolean expandThroughNRight (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd)
boolean expandThroughNLeft (struct ffAli *ali, DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd)
static struct ffAliexpandAlis (struct ffAli *ali, DNA *nStart, DNA *nEnd, DNA *hStart, DNA *hEnd, int gapPenalty, int maxSkip)
DNAmatchInMem (DNA *ns, DNA *ne, DNA *hs, DNA *he)
ffAlifindAliBetween (DNA *tile, int tileSize, DNA *ns, DNA *ne, DNA *hs, DNA *he)
double evalExactAli (struct ffAli *ali, DNA *ns, DNA *ne, DNA *hs, DNA *he, int numTiles, double freq[4])
void ffUnlink (struct ffAli **pList, struct ffAli *el)
void unlinkAli (struct ffAli **pList, struct ffAli *ali)
int cmpProtoSize (const void *va, const void *vb)
int cmpProtoNeedle (const void *va, const void *vb)
int cmpProtoScore (const void *va, const void *vb)
boolean canAdd (struct protoGene *a, struct protoGene *b)
boolean bestMerger (struct protoGene *list, enum ffStringency stringency, DNA *ns, DNA *hs, struct protoGene **retA, struct protoGene **retB)
void mergeProtoGenes (struct protoGene **pList, struct protoGene *a, struct protoGene *b)
void lumpProtoGenes (struct protoGene **pList, DNA *ns, DNA *hs, enum ffStringency stringency)
protoGenelumpHits (struct ffAli **pHitList, DNA *ns, DNA *hs)
void thinProtoList (struct protoGene **pList, int maxToTake)
static struct ffAliweaveAli (struct ffAli *hitList, DNA *ns, DNA *ne, DNA *hs, DNA *he, double freq[4], int *rBestVal, enum ffStringency stringency)
static struct ffAlirwFindTilesBetween (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency, double probMax)
static struct ffAliexactAli (DNA *ns, DNA *ne, DNA *hs, DNA *he)
static struct ffAlirecursiveWeave (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency, double probMax, int level, int orientation)
static struct ffAlifindWovenTiles (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency)
static struct ffAlifindBestAli (DNA *ns, DNA *ne, DNA *hs, DNA *he, enum ffStringency stringency)
static struct ffAlisaveAliToPermanentMem (struct ffAli *volatileAli)
ffAliffFind (DNA *needleStart, DNA *needleEnd, DNA *hayStart, DNA *hayEnd, enum ffStringency stringency)
boolean ffFindAndScore (DNA *needle, int needleSize, DNA *haystack, int haySize, enum ffStringency stringency, struct ffAli **pAli, boolean *pRcNeedle, int *pScore)
boolean ffFindEitherStrandN (DNA *needle, int needleSize, DNA *haystack, int haySize, enum ffStringency stringency, struct ffAli **pAli, boolean *pRcNeedle)
boolean ffFindEitherStrand (DNA *needle, DNA *haystack, enum ffStringency stringency, struct ffAli **pAli, boolean *pRcNeedle)

Variables

static char const rcsid [] = "$Id: fuzzyFind.c,v 1.17 2006/05/09 01:05:55 markd Exp $"
static jmp_buf ffRecover
static struct lmffMemPool = NULL
static int extendThroughN
static double rwFreq [4]
static boolean rwIsCdna
static boolean rwCheckGoodEnough


Function Documentation

boolean bestMerger ( struct protoGene list,
enum ffStringency  stringency,
DNA ns,
DNA hs,
struct protoGene **  retA,
struct protoGene **  retB 
)

Definition at line 934 of file fuzzyFind.c.

References canAdd(), FALSE, ffCdna, protoGene::hEnd, protoGene::hStart, protoGene::nEnd, protoGene::nStart, and protoGene::right.

Referenced by lumpProtoGenes().

00937 {
00938 int bestScore = -0x7fffffff;
00939 int score;
00940 struct protoGene *bestA = NULL, *bestB = NULL;
00941 struct protoGene *a, *b;
00942 boolean isCdna = (stringency == ffCdna);
00943 
00944 if (list == NULL)
00945     return FALSE;
00946 
00947 for (a = list; a != NULL; a = a->right)
00948     {
00949     for (b = a->right; b != NULL; b = b->right)
00950         {
00951         if (canAdd(a,b))
00952             {
00953             int hGap = b->hStart - a->hEnd;
00954             int nGap = b->nStart - a->nEnd;
00955             if (hGap < 0)
00956                 {
00957                 hGap = -8*hGap;
00958                 if (!isCdna || hGap < 32)
00959                     hGap = (hGap*hGap);
00960                 }
00961             if (nGap < 0)
00962                 nGap = -8*nGap;
00963             score = -hGap - nGap*nGap;
00964             if (score > bestScore)
00965                 {
00966                 bestA = a;
00967                 bestB = b;
00968                 bestScore = score;
00969                 }
00970             }
00971         }
00972     }
00973 *retA = bestA;
00974 *retB = bestB;
00975 return bestA != NULL;
00976 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean canAdd ( struct protoGene a,
struct protoGene b 
)

Definition at line 906 of file fuzzyFind.c.

References FALSE, ffAli::hEnd, protoGene::hEnd, protoGene::hits, ffAli::hStart, protoGene::hStart, ffAli::nEnd, protoGene::nEnd, ffAli::nStart, protoGene::nStart, ffAli::right, and TRUE.

Referenced by bestMerger().

00909 {
00910 struct ffAli *pa;
00911 DNA *bnEnd = b->nEnd;
00912 DNA *bnStart = b->nStart;
00913 DNA *bhEnd = b->hEnd;
00914 DNA *bhStart = b->hStart;
00915 int bSize = bnEnd - bnStart;
00916 int maxOverlap;
00917 for (pa = a->hits; pa != NULL; pa = pa->right)
00918     {
00919     int aSize = pa->nEnd - pa->nStart;
00920     DNA *start = (bnStart > pa->nStart ? bnStart : pa->nStart);
00921     DNA *end = (bnEnd < pa->nEnd ? bnEnd : pa->nEnd);
00922     maxOverlap = (aSize < bSize ? aSize : bSize) / 4;
00923     if (maxOverlap < 2) maxOverlap = 2;
00924     if (end - start >= maxOverlap)
00925        return FALSE;
00926     start = (bhStart > pa->hStart ? bhStart : pa->hStart);
00927     end = (bhEnd < pa->hEnd ? bhEnd : pa->hEnd);
00928     if (end - start >= maxOverlap)
00929         return FALSE;
00930     }
00931 return TRUE;
00932 }

Here is the caller graph for this function:

int cmpProtoNeedle ( const void *  va,
const void *  vb 
)

Definition at line 890 of file fuzzyFind.c.

References protoGene::nStart.

00892 {
00893 const struct protoGene *a = *((struct protoGene **)va);
00894 const struct protoGene *b = *((struct protoGene **)vb);
00895 return (a->nStart - b->nStart);
00896 }

int cmpProtoScore ( const void *  va,
const void *  vb 
)

Definition at line 898 of file fuzzyFind.c.

References protoGene::score.

Referenced by thinProtoList().

00900 {
00901 const struct protoGene *a = *((struct protoGene **)va);
00902 const struct protoGene *b = *((struct protoGene **)vb);
00903 return (b->score - a->score);
00904 }

Here is the caller graph for this function:

int cmpProtoSize ( const void *  va,
const void *  vb 
)

Definition at line 879 of file fuzzyFind.c.

References protoGene::nEnd, and protoGene::nStart.

00881 {
00882 const struct protoGene *a = *((struct protoGene **)va);
00883 const struct protoGene *b = *((struct protoGene **)vb);
00884 int aSize, bSize;
00885 aSize = a->nEnd - a->nStart;
00886 bSize = b->nEnd - b->nStart;
00887 return (bSize - aSize);
00888 }

void dumpFf ( struct ffAli left,
DNA needle,
DNA hay 
)

Definition at line 81 of file supStitch.c.

Referenced by dumpBuns().

00083 {
00084 struct ffAli *ff;
00085 for (ff = left; ff != NULL; ff = ff->right)
00086     {
00087     printf("(%ld - %ld)[%ld-%ld] ", (long)(ff->hStart-hay), (long)(ff->hEnd-hay),
00088         (long)(ff->nStart - needle), (long)(ff->nEnd - needle));
00089     }
00090 printf("\n");
00091 }

Here is the caller graph for this function:

double evalExactAli ( struct ffAli ali,
DNA ns,
DNA ne,
DNA hs,
DNA he,
int  numTiles,
double  freq[4] 
)

Definition at line 831 of file fuzzyFind.c.

References ffAli::nEnd, ffAli::nStart, oligoProb(), and ffAli::right.

Referenced by rwFindTilesBetween().

00833 {
00834 int haySize = he-hs;
00835 double prob = 1.0;
00836 double allPossibles = haySize*numTiles;
00837 
00838 for (;ali != NULL;ali=ali->right)
00839     {
00840     double p = oligoProb(ali->nStart, ali->nEnd - ali->nStart, freq) * allPossibles;
00841     if (p < 1.0)
00842         prob *= p;
00843     }
00844 return prob;
00845 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* exactAli ( DNA ns,
DNA ne,
DNA hs,
DNA he 
) [static, read]

Definition at line 1236 of file fuzzyFind.c.

References exactFind(), and ffNeedMem().

Referenced by recursiveWeave().

01238 {
01239 int exactOffset;
01240 if (exactFind(ns, ne-ns, hs, he-hs, &exactOffset))
01241     {
01242     struct ffAli *ali = ffNeedMem(sizeof(*ali));
01243     ali->nStart = ns;
01244     ali->nEnd = ne;
01245     ali->hStart = hs + exactOffset;
01246     ali->hEnd = ali->hStart + (ne - ns);
01247     if (ali->hEnd > he)
01248         ali->hEnd = he;
01249     return ali;
01250     }
01251 else
01252     return NULL;
01253 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean exactFind ( DNA needle,
int  nSize,
DNA hay,
int  hSize,
int *  hOffset 
) [static]

Definition at line 498 of file fuzzyFind.c.

References FALSE, and TRUE.

Referenced by exactAli().

00500 {
00501 DNA firstC = needle[0];
00502 DNA *restOfNeedle = needle+1;
00503 int i;
00504 int endIx = hSize - nSize;
00505 int restSize = nSize-1;
00506 
00507 for (i = 0; i<=endIx; ++i)
00508     {
00509     if (firstC == hay[i])
00510         {
00511         if (memcmp(restOfNeedle, hay+i+1, restSize) == 0)
00512             {
00513             *hOffset = i;
00514             return TRUE;
00515             }
00516         }
00517     }
00518 *hOffset = -1;
00519 return FALSE;
00520 }

Here is the caller graph for this function:

static struct ffAli* expandAlis ( struct ffAli ali,
DNA nStart,
DNA nEnd,
DNA hStart,
DNA hEnd,
int  gapPenalty,
int  maxSkip 
) [static, read]

Definition at line 669 of file fuzzyFind.c.

References expandThroughNLeft(), expandThroughNRight(), FALSE, ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, and TRUE.

Referenced by findBestAli().

00672 {
00673 struct ffAli *a, *left, *right;
00674 DNA *ns, *ne, *hs, *he;
00675 boolean expanded = TRUE;
00676 
00677 while (expanded)
00678     {
00679     expanded = FALSE;
00680     /* Loop through expanding three times. */
00681     /* First just expand through N's that don't require an insertion/deletion. */
00682     for (a = ali; a != NULL; a = a->right)
00683         {
00684         if ((left = a->left) != NULL)
00685             {
00686             ns = left->nEnd;
00687             hs = left->hEnd;
00688             }
00689         else
00690             {
00691             ns = nStart;
00692             hs = hStart;
00693             }
00694         if ((right = a->right) != NULL)
00695             {
00696             ne = right->nStart;
00697             he = right->hStart;
00698             }
00699         else
00700             {
00701             ne = nEnd;
00702             he = hEnd;
00703             }
00704         expanded |= expandThroughNLeft(a, ns, ne, hs, he);
00705         expanded |= expandThroughNRight(a, ns, ne, hs, he);
00706         }
00707     /* Second do other expansion that doesn't require an insertion/deletion. */
00708     for (a = ali; a != NULL; a = a->right)
00709         {
00710         if ((left = a->left) != NULL)
00711             {
00712             ns = left->nEnd;
00713             hs = left->hEnd;
00714             }
00715         else
00716             {
00717             ns = nStart;
00718             hs = hStart;
00719             }
00720         if ((right = a->right) != NULL)
00721             {
00722             ne = right->nStart;
00723             he = right->hStart;
00724             }
00725         else
00726             {
00727             ne = nEnd;
00728             he = hEnd;
00729             }
00730         expanded |= expandLeft(a, ns, ne, hs, he, 0, gapPenalty, maxSkip);
00731         expanded |= expandRight(a, ns, ne, hs, he, 0, gapPenalty, maxSkip);
00732         }
00733     /* Finally do insertion/deletion. */
00734     for (a = ali; a != NULL; a = a->right)
00735         {
00736         if ((left = a->left) != NULL)
00737             {
00738             ns = left->nEnd;
00739             hs = left->hEnd;
00740             }
00741         else
00742             {
00743             ns = nStart;
00744             hs = hStart;
00745             }
00746         if ((right = a->right) != NULL)
00747             {
00748             ne = right->nStart;
00749             he = right->hStart;
00750             }
00751         else
00752             {
00753             ne = nEnd;
00754             he = hEnd;
00755             }
00756         expanded |= expandLeft(a, ns, ne, hs, he, 1, gapPenalty, maxSkip);
00757         expanded |= expandRight(a, ns, ne, hs, he, 1, gapPenalty, maxSkip);
00758         }
00759     while (ali->left)
00760         ali = ali->left;
00761     }
00762 return ali;
00763 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean expandLeft ( struct ffAli ali,
DNA needleStart,
DNA needleEnd,
DNA hayStart,
DNA hayEnd,
int  numSkips,
int  gapPenalty,
int  maxSkip 
) [static]

Definition at line 355 of file fuzzyFind.c.

References dnaScoreMatch(), expandRight(), ffNeedMem(), ffAli::hStart, ffAli::left, leftNextMatch(), ffAli::nStart, and ffAli::right.

Referenced by expandRight().

00359 {
00360 DNA *ns = ali->nStart;
00361 DNA *hs = ali->hStart;
00362 int score;
00363 DNA *oldNs = ns;
00364 
00365 for (;;)
00366     {
00367     while (ns > needleStart && hs > hayStart && ns[-1] == hs[-1])
00368         {
00369         --hs;
00370         --ns;
00371         }
00372     if (ns <= needleStart || hs <= hayStart)
00373         {
00374         ali->nStart = ns;
00375         ali->hStart = hs;
00376         return ns != oldNs;
00377         }
00378     /* Find fuzzy score to the left. */
00379         {
00380         int windowSize = 5;
00381         int nSize = ns-needleStart;
00382         int hSize = hs-hayStart;
00383         if (windowSize > nSize) windowSize = nSize;
00384         if (windowSize > hSize) windowSize = hSize;
00385         if (windowSize > 0)
00386             score = dnaScoreMatch(ns-windowSize, hs-windowSize, windowSize); 
00387         else
00388             score = -1;
00389 
00390         if (score >= windowSize-2)
00391             {
00392             hs -= windowSize;
00393             ns -= windowSize;
00394             }
00395         else if (--numSkips >= 0)
00396             {
00397             struct ffAli *newAli = ffNeedMem(sizeof(*newAli));
00398             ali->nStart = ns;
00399             ali->hStart = hs;
00400             if (ns - needleStart < 3 || 
00401                 !leftNextMatch(newAli, needleStart, ns, hayStart, hs, gapPenalty, maxSkip))
00402                 {
00403                 return ns != oldNs; /* We did our best... */
00404                 }
00405             newAli->right = ali;
00406             newAli->left = ali->left;
00407             if (ali->left)
00408                 ali->left->right = newAli;
00409             ali->left = newAli;
00410             ali = newAli;
00411             expandRight(ali, needleStart, ns, hayStart, hs, 0, gapPenalty, maxSkip);
00412             ns = ali->nStart;
00413             hs = ali->hStart;
00414             }
00415         else
00416             {
00417             ali->nStart = ns;
00418             ali->hStart = hs;
00419             return ns != oldNs;
00420             }
00421         }
00422     }
00423 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean expandRight ( struct ffAli ali,
DNA needleStart,
DNA needleEnd,
DNA hayStart,
DNA hayEnd,
int  numSkips,
int  gapPenalty,
int  maxSkip 
) [static]

Definition at line 425 of file fuzzyFind.c.

References dnaScoreMatch(), expandLeft(), ffNeedMem(), ffAli::hEnd, ffAli::left, ffAli::nEnd, ffAli::right, and rightNextMatch().

Referenced by expandLeft().

00428 {
00429 int score;
00430 DNA *ne = ali->nEnd;
00431 DNA *he = ali->hEnd;
00432 DNA *oldNe = ne;
00433 
00434 for (;;)
00435     {
00436     /* Expand as far to the right as you can keeping perfect match */
00437     while (ne < needleEnd && he < hayEnd && *ne == *he)
00438         {
00439         ++he;
00440         ++ne;
00441         }
00442 
00443     /* Check to see if have come to the end. */
00444     if (ne >= needleEnd || he >= hayEnd)
00445         {
00446         ali->nEnd = ne;
00447         ali->hEnd = he;
00448         return oldNe != ne;
00449         }
00450 
00451     /* Find fuzzy score to the right. */
00452         {
00453         int windowSize = 5;
00454         int nSize = needleEnd-ne;
00455         int hSize = hayEnd - he;
00456         if (windowSize > nSize) windowSize = nSize;
00457         if (windowSize > hSize) windowSize = hSize;
00458         if (windowSize > 0)
00459             score = dnaScoreMatch(ne, he, windowSize); 
00460         else
00461             score = -1;
00462 
00463         if (score >= windowSize-2)
00464             {
00465             he += windowSize;
00466             ne += windowSize;
00467             }
00468         else if (--numSkips >= 0)
00469             {
00470             struct ffAli *newAli = ffNeedMem(sizeof(*newAli));
00471             ali->nEnd = ne;
00472             ali->hEnd = he;
00473             if (needleEnd - ne < 3 || 
00474                 !rightNextMatch(newAli, ne, needleEnd, he, hayEnd, gapPenalty, maxSkip))
00475                 {
00476                 return oldNe != ne; /* We did our best... */
00477                 }
00478             newAli->left = ali;
00479             newAli->right = ali->right;
00480             if (ali->right)
00481                 ali->right->left = newAli;
00482             ali->right = newAli;
00483             ali = newAli;
00484             expandLeft(ali, ne, needleEnd, he, hayEnd, 0, gapPenalty, maxSkip);
00485             ne = ali->nEnd;
00486             he = ali->hEnd;
00487             }
00488         else 
00489             {
00490             ali->nEnd = ne;
00491             ali->hEnd = he;
00492             return oldNe != ne;
00493             }
00494         }
00495     }
00496 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean expandThroughNLeft ( struct ffAli ali,
DNA needleStart,
DNA needleEnd,
DNA hayStart,
DNA hayEnd 
)

Definition at line 641 of file fuzzyFind.c.

References extendThroughN, FALSE, ffAli::hStart, ffAli::nStart, and TRUE.

Referenced by expandAlis().

00644 {
00645 DNA *nStart = ali->nStart-1;
00646 DNA *hStart = ali->hStart-1;
00647 DNA n,h;
00648 boolean expanded = FALSE;
00649 while (nStart >= needleStart && hStart >= hayStart)
00650     {
00651     n = *nStart;
00652     h = *hStart;
00653     if ((n == h) ||
00654         (n == 'n' && (extendThroughN || nStart - 3 < needleStart || nStart[-1] != 'n' || nStart[-2] != 'n' || nStart[-3] != 'n')) ||
00655         (h == 'n' && (extendThroughN || hStart - 3 < hayStart || hStart[-1] != 'n' || hStart[-2] != 'n' || hStart[-3] != 'n')))
00656         {
00657         nStart -= 1;
00658         hStart -= 1;
00659         expanded = TRUE;
00660         }
00661     else
00662         break;
00663     }
00664 ali->nStart = nStart + 1;
00665 ali->hStart = hStart + 1;
00666 return expanded;
00667 }

Here is the caller graph for this function:

boolean expandThroughNRight ( struct ffAli ali,
DNA needleStart,
DNA needleEnd,
DNA hayStart,
DNA hayEnd 
)

Definition at line 611 of file fuzzyFind.c.

References extendThroughN, FALSE, ffAli::hEnd, ffAli::nEnd, and TRUE.

Referenced by expandAlis().

00614 {
00615 DNA *nEnd = ali->nEnd;
00616 DNA *hEnd = ali->hEnd;
00617 DNA n,h;
00618 boolean expanded = FALSE;
00619 while (nEnd < needleEnd && hEnd < hayEnd)
00620     {
00621     n = *nEnd;
00622     h = *hEnd;
00623     if ((n == h) || 
00624         (n == 'n' && 
00625         (extendThroughN || nEnd + 3 >= needleEnd || nEnd[1] != 'n' || nEnd[2] != 'n' || nEnd[3] != 'n')) ||
00626         (h == 'n' && 
00627         (extendThroughN || hEnd + 3 >= hayEnd || hEnd[1] != 'n' || hEnd[2] != 'n' || hEnd[3] != 'n')))
00628         {
00629         nEnd += 1;
00630         hEnd += 1;
00631         expanded = TRUE;
00632         }
00633     else
00634         break;
00635     }
00636 ali->nEnd = nEnd;
00637 ali->hEnd = hEnd;
00638 return expanded;
00639 }

Here is the caller graph for this function:

static void ffAbort (  )  [static]

Definition at line 77 of file fuzzyFind.c.

References ffRecover.

Referenced by ffFind(), and saveAliToPermanentMem().

00079 {
00080 longjmp(ffRecover, -1);
00081 }

Here is the caller graph for this function:

struct ffAli* ffFind ( DNA needleStart,
DNA needleEnd,
DNA hayStart,
DNA hayEnd,
enum ffStringency  stringency 
) [read]

Definition at line 1428 of file fuzzyFind.c.

References dnaUtilOpen(), ffAbort(), ffCountGoodEnds(), ffMemCleanup(), ffMemInit(), ffRecover, findBestAli(), popAbortHandler(), pushAbortHandler(), and saveAliToPermanentMem().

Referenced by alignComponents(), ffFindAndScore(), smallMiddleExons(), and ssFindBundles().

01431 {
01432 struct ffAli *bestAli;
01433 int status;
01434 
01435 assert(needleStart <= needleEnd);
01436 assert(hayStart <= hayEnd);
01437 
01438 ffMemInit();
01439 dnaUtilOpen();
01440 
01441 
01442 /* Set up error recovery. */
01443 status = setjmp(ffRecover);
01444 if (status == 0)    /* Always true except after long jump. */
01445     {
01446     pushAbortHandler(ffAbort);
01447     bestAli = findBestAli(needleStart, needleEnd, hayStart, hayEnd, stringency);
01448     ffCountGoodEnds(bestAli);
01449     bestAli = saveAliToPermanentMem(bestAli);
01450     }
01451 else    /* They long jumped here because of an error. */
01452     {
01453     bestAli = NULL;
01454     }
01455 popAbortHandler();
01456 ffMemCleanup();
01457 return bestAli;
01458 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean ffFindAndScore ( DNA needle,
int  needleSize,
DNA haystack,
int  haySize,
enum ffStringency  stringency,
struct ffAli **  pAli,
boolean *  pRcNeedle,
int *  pScore 
)

Definition at line 1460 of file fuzzyFind.c.

References FALSE, ffFind(), ffFreeAli(), ffScore(), reverseComplement(), and TRUE.

Referenced by ffFindEitherStrandN().

01465 {
01466 int fScore, rScore;
01467 struct ffAli *fAli, *rAli;
01468 
01469 /* Get forward alignment and score it. */
01470 fAli = ffFind(needle, needle+needleSize, haystack, haystack+haySize, stringency);
01471 fScore = ffScore(fAli,stringency);
01472 
01473 /* Get reverse alignment and score it. */
01474 reverseComplement(needle, needleSize);
01475 rAli = ffFind(needle, needle+needleSize, haystack, haystack+haySize, stringency);
01476 rScore = ffScore(rAli,stringency);
01477 reverseComplement(needle, needleSize);
01478 
01479 /* If no good alignment on either strand return FALSE. */
01480 if (fAli == NULL && rAli == NULL)
01481     {
01482     *pAli = NULL;
01483     return FALSE;
01484     }
01485 
01486 /* Return TRUE with best alignment.  Free the other one. */
01487 if (fScore > rScore)
01488     {
01489     *pAli = fAli;
01490     *pRcNeedle = FALSE;
01491     if (pScore != NULL)
01492         *pScore = fScore;
01493     ffFreeAli(&rAli);
01494     }   
01495 else
01496     {
01497     *pAli = rAli;
01498     *pRcNeedle = TRUE;
01499     if (pScore != NULL)
01500         *pScore = rScore;
01501     ffFreeAli(&fAli);
01502     }
01503 return TRUE;
01504 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean ffFindEitherStrand ( DNA needle,
DNA haystack,
enum ffStringency  stringency,
struct ffAli **  pAli,
boolean *  pRcNeedle 
)

Definition at line 1514 of file fuzzyFind.c.

References ffFindEitherStrandN().

01519 {
01520 return ffFindEitherStrandN(needle, strlen(needle), haystack, strlen(haystack),
01521 stringency, pAli, pRcNeedle);
01522 }

Here is the call graph for this function:

boolean ffFindEitherStrandN ( DNA needle,
int  needleSize,
DNA haystack,
int  haySize,
enum ffStringency  stringency,
struct ffAli **  pAli,
boolean *  pRcNeedle 
)

Definition at line 1506 of file fuzzyFind.c.

References ffFindAndScore().

Referenced by ffFindEitherStrand().

01510 {
01511 return ffFindAndScore(needle, needleSize, haystack, haySize, stringency, pAli, pRcNeedle, NULL);
01512 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean ffFindGoodOligo ( DNA needle,
int  needleLength,
double  maxProb,
double  freq[4],
DNA **  rOligo,
int *  rOligoLength,
double *  rOligoProb 
) [static]

Definition at line 219 of file fuzzyFind.c.

References FALSE, findImprobableOligo(), hasRepeat(), and TRUE.

Referenced by rwFindTilesBetween().

00223 {
00224 int oligoLen;
00225 DNA *oligo;
00226 
00227 /* Loop around until you get one that doesn't repeat. */
00228 for (;;)
00229     {
00230     if (!findImprobableOligo(needle, needleLength, maxProb, freq, rOligo, rOligoLength, rOligoProb))
00231         return FALSE;
00232     oligoLen = *rOligoLength;
00233     oligo = *rOligo;
00234     if (hasRepeat(oligo, oligoLen))
00235         {
00236         DNA *newNeedle = oligo+oligoLen;
00237         int newSize = needleLength - (newNeedle-needle);
00238         if (newSize <= 0)
00239             return FALSE;
00240         needle = newNeedle;
00241         needleLength = newSize;
00242         }
00243     else
00244         return TRUE;
00245     }
00246 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void ffMemCleanup (  )  [static]

Definition at line 91 of file fuzzyFind.c.

References ffMemPool, and lmCleanup().

Referenced by ffFind().

00093 {
00094 lmCleanup(&ffMemPool);
00095 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void ffMemInit (  )  [static]

Definition at line 85 of file fuzzyFind.c.

References ffMemPool, and lmInit().

Referenced by ffFind().

00087 {
00088 ffMemPool = lmInit(2048);
00089 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void* ffNeedMem ( size_t  size  )  [static]

Definition at line 97 of file fuzzyFind.c.

References ffMemPool, and lmAlloc().

Referenced by exactAli(), expandLeft(), expandRight(), findAliBetween(), lumpHits(), and rwFindTilesBetween().

00099 {
00100 return lmAlloc(ffMemPool, size);
00101 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ffUnlink ( struct ffAli **  pList,
struct ffAli el 
)

Definition at line 847 of file fuzzyFind.c.

References ffAli::left, and ffAli::right.

Referenced by mergeProtoGenes(), and unlinkAli().

00850 {
00851 struct ffAli *list = *pList;
00852 struct ffAli *right = el->right;
00853 struct ffAli *left = el->left;
00854 
00855 if (el == list)    /* If first element need to update list. */
00856     *pList = right;
00857 if (right != NULL)
00858     right->left = left;
00859 if (left != NULL)
00860     left->right = right;
00861 el->left = el->right = NULL;
00862 }

Here is the caller graph for this function:

struct ffAli* findAliBetween ( DNA tile,
int  tileSize,
DNA ns,
DNA ne,
DNA hs,
DNA he 
) [read]

Definition at line 783 of file fuzzyFind.c.

References ffExpandExactLeft(), ffExpandExactRight(), ffNeedMem(), and matchInMem().

00784 {
00785 DNA *match;
00786 DNA *tileEnd = tile + tileSize;
00787 int tries = 0;
00788 for (;;)
00789     {
00790     if ((match = matchInMem(tile, tileEnd, hs, he)) == NULL)
00791         return NULL;
00792     if (matchInMem(tile, tileEnd, match+1, he) == NULL)
00793         {
00794         /* Got exactly one match, whoopie! */
00795         struct ffAli *ali = ffNeedMem(sizeof(*ali));
00796         ali->nStart = tile;
00797         ali->nEnd = tileEnd;
00798         ali->hStart = match;
00799         ali->hEnd = match + (tileEnd-tile);
00800         ffExpandExactLeft(ali, ns, hs);
00801         ffExpandExactRight(ali, ne, he);
00802         return ali;
00803         }
00804 
00805     /* Got more than one match.  Expand tile to make it more specific.
00806      * In general first add base to start, then base to beginning.  If
00807      * at beginning or end already are constrained.  If already spanning
00808      * full needle, can't expand, so you're done. */
00809     if (tile <= ns)
00810         {
00811         if (tileEnd >= ne)
00812             return NULL;
00813         ++tileEnd;
00814         }
00815     else if (tile >= tileEnd)
00816         {
00817         --tile;
00818         }
00819     else if (tries & 1)
00820         {
00821         ++tileEnd;
00822         }
00823     else
00824         {
00825         --tile;
00826         }    
00827     ++tries;
00828     }    
00829 }

Here is the call graph for this function:

static struct ffAli* findBestAli ( DNA ns,
DNA ne,
DNA hs,
DNA he,
enum ffStringency  stringency 
) [static, read]

Definition at line 1364 of file fuzzyFind.c.

References expandAlis(), FALSE, ffCdna, ffMergeHayOverlaps(), ffMergeNeedleAlis(), ffRemoveEmptyAlis(), ffSlideIntrons(), findWovenTiles(), nextPowerOfFour(), reconsiderAlignedGaps(), and trimAlis().

Referenced by ffFind().

01365 {
01366 static int iniExpGapPen[] = {0 /* (exact) */, 4 /* cDna */, 4 /* tight */, 4 /* loose */};
01367 static int addExpGapPen[] = {0 /* (exact) */, 3 /* cDna */, 3 /* tight */, 3 /* loose */};
01368 static int midTileMinSize[] ={0 /*(exact) */,12 /* cDNA */, 12 /* tight */, 4 /* loose */};
01369 
01370 struct ffAli *bestAli;
01371 int matchSize;
01372 
01373 matchSize = nextPowerOfFour(he-hs)+1;
01374 if (matchSize < midTileMinSize[stringency])
01375     matchSize = midTileMinSize[stringency];
01376 
01377 bestAli = findWovenTiles(ns, ne, hs, he, stringency);
01378 if (bestAli == NULL)
01379     return NULL;
01380 
01381 bestAli = ffMergeNeedleAlis(bestAli, FALSE);
01382 bestAli = expandAlis(bestAli,ns,ne,hs,he,iniExpGapPen[stringency], 1);
01383 bestAli = ffMergeNeedleAlis(bestAli, FALSE);
01384 bestAli = expandAlis(bestAli,ns,ne,hs,he,addExpGapPen[stringency], 2*matchSize);
01385 bestAli = trimAlis(bestAli);
01386 bestAli = ffMergeNeedleAlis(bestAli, FALSE);
01387 bestAli = ffMergeHayOverlaps(bestAli);
01388 bestAli = reconsiderAlignedGaps(bestAli);
01389 bestAli = ffRemoveEmptyAlis(bestAli, FALSE);
01390 bestAli = ffMergeNeedleAlis(bestAli, FALSE);
01391 if (stringency == ffCdna)
01392     ffSlideIntrons(bestAli);
01393 bestAli = ffRemoveEmptyAlis(bestAli, FALSE);
01394 return bestAli;
01395 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean findImprobableOligo ( DNA needle,
int  needleLength,
double  maxProb,
double  freq[4],
DNA **  rOligo,
int *  rOligoLength,
double *  rOligoProb 
) [static]

Definition at line 130 of file fuzzyFind.c.

References FALSE, ntVal, and TRUE.

Referenced by ffFindGoodOligo().

00134 {
00135 int i;
00136 double totalProb = 1.0;
00137 int base;
00138 int startIx = 0;
00139 
00140 for (i=0;i<needleLength;++i)
00141     {
00142     if ((base = ntVal[(int)needle[i]]) < 0)
00143         {
00144         totalProb = 1.0;
00145         startIx = i+1;
00146         }
00147     else
00148         {
00149         if ((totalProb *= freq[base]) <= maxProb)
00150             {
00151             *rOligo = needle+startIx;
00152             *rOligoLength = i-startIx+1;
00153             *rOligoProb = totalProb;
00154             return TRUE;
00155             }
00156         }
00157     }
00158 return FALSE;
00159 }

Here is the caller graph for this function:

static struct ffAli* findWovenTiles ( DNA ns,
DNA ne,
DNA hs,
DNA he,
enum ffStringency  stringency 
) [static, read]

Definition at line 1342 of file fuzzyFind.c.

References ffCdna, ffTight, makeFreqTable(), and recursiveWeave().

Referenced by findBestAli().

01343 {
01344 struct ffAli *bestAli;
01345 int haySize = he - hs;
01346 int needleSize = ne - ns;
01347 static double tileStrinProbMult[] = { 0.0001, 0.0005, 0.0005, 0.5, };
01348                                     /* exact  cDNA        tight    loose */
01349 if (needleSize < 2 || haySize < 2)  /* Be serious man! */
01350     return NULL;
01351 
01352 /* Set up rwVariables - essentially locals except that they don't change over the course
01353  * of the recursion. */
01354 makeFreqTable(hs, haySize, rwFreq);
01355 rwIsCdna = (stringency == ffCdna);
01356 rwCheckGoodEnough = (stringency == ffTight || stringency == ffCdna);
01357 
01358 bestAli = recursiveWeave(ns, ne, hs, he, stringency, tileStrinProbMult[stringency], 1, 0);
01359 
01360 return bestAli;
01361 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean hasRepeat ( DNA oligo,
int  oligoLen 
) [static]

Definition at line 162 of file fuzzyFind.c.

References FALSE, and TRUE.

Referenced by ffFindGoodOligo().

00164 {
00165 int mod;
00166 int i;
00167 DNA b;
00168 boolean gotRepeat;
00169 int repSize;
00170 int maxRep = (oligoLen+1)/2;
00171 
00172 b = oligo[0];
00173 gotRepeat = TRUE;
00174 for (i=1; i<oligoLen; ++i)
00175     {
00176     if (oligo[i] != b)
00177         {
00178         gotRepeat = FALSE;
00179         break;
00180         }
00181     }
00182 if (gotRepeat)
00183     return TRUE;
00184 
00185 gotRepeat = TRUE;
00186 for (i=2; i<oligoLen; ++i)
00187     {
00188     if (oligo[i&1] != oligo[i])
00189         {
00190         gotRepeat = FALSE;
00191         break;
00192         }
00193     }
00194 if (gotRepeat)
00195     return TRUE;
00196 
00197 for (repSize = 3; repSize <= maxRep; ++repSize)
00198     {
00199     mod = 0;
00200     gotRepeat = TRUE;
00201     for (i=repSize; i<oligoLen; ++i)
00202         {
00203         if (oligo[mod] != oligo[i])
00204             {
00205             gotRepeat = FALSE;
00206             break;
00207             }
00208         if (++mod == repSize)
00209             mod = 0;
00210         }
00211     if (gotRepeat)
00212         return TRUE;
00213     }
00214 return gotRepeat;
00215 }

Here is the caller graph for this function:

static boolean leftNextMatch ( struct ffAli ali,
DNA ns,
DNA ne,
DNA hs,
DNA he,
int  gapPenalty,
int  maxSkip 
) [static]

Definition at line 248 of file fuzzyFind.c.

References digitsBaseTwo(), FALSE, ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, and TRUE.

Referenced by expandLeft().

00252 {
00253 int haySize = he - hs;
00254 int needleSize = ne - ns;
00255 int diagSize = haySize + needleSize;
00256 int matchSize;
00257 int i;
00258 
00259 /* We take care of bigger skips on the "tile" level. */
00260 if (diagSize > maxSkip)
00261     diagSize = maxSkip;
00262 /* Scan diagonally... */
00263 /*
00264 0 1 2 3
00265 1 2 3 4
00266 2 3 4 5
00267 3 4 5 6
00268 */
00269 for (i=1; i<=diagSize; ++i)
00270     {
00271     int hOff = i;
00272     int nOff = 0;
00273     int hDiff = hOff - haySize;
00274     matchSize = gapPenalty + digitsBaseTwo(i);
00275     if (hDiff > 0)
00276         {
00277         nOff += hDiff;
00278         hOff -= hDiff;
00279         }
00280     for (;hOff >= 0; --hOff, ++nOff)
00281         {
00282         int needleLeft = needleSize - nOff;
00283         int hayLeft = haySize - hOff;
00284         if (matchSize > needleLeft) break;
00285         if (matchSize > hayLeft) continue;
00286         if (ne[-nOff-1] == he[-hOff-1] && memcmp(ne-nOff-matchSize, he-hOff-matchSize, matchSize) == 0)
00287             {
00288             ali->nStart = ne - nOff - matchSize;
00289             ali->nEnd = ne - nOff;
00290             ali->hStart = he - hOff - matchSize;
00291             ali->hEnd = he- hOff;
00292             return TRUE;
00293             }
00294         }
00295     }
00296 return FALSE;
00297 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct protoGene* lumpHits ( struct ffAli **  pHitList,
DNA ns,
DNA hs 
) [read]

Definition at line 1005 of file fuzzyFind.c.

References ffMakeRightLinks(), ffNeedMem(), ffAli::hEnd, protoGene::hEnd, protoGene::hits, ffAli::hStart, protoGene::hStart, ffAli::left, protoGene::left, ffAli::nEnd, protoGene::nEnd, ffAli::nStart, protoGene::nStart, protoGene::right, ffAli::right, and unlinkAli().

01009 {
01010 struct protoGene proto;
01011 struct protoGene *retProto;
01012 struct ffAli *ali, *nextAli;
01013 int dif, lastDif;
01014 int lumpCount = 1;
01015 
01016 if ((ali = *pHitList) == NULL)
01017     return NULL;
01018 
01019 /* Move first hit onto our crude exon. */
01020 nextAli = ali->right;
01021 proto.left = proto.right = NULL;
01022 unlinkAli(pHitList, ali);
01023 proto.hits = ali;
01024 proto.hStart = ali->hStart;
01025 proto.hEnd = ali->hEnd;
01026 proto.nStart = ali->nStart;
01027 proto.nEnd = ali->nEnd;
01028 lastDif = ali->hStart - ali->nStart;
01029 
01030 /* Go through rest of list and put others that are close to
01031  * on the same diagonal onto this exon. */
01032 while ((ali = nextAli) != NULL)
01033     {
01034     nextAli = ali->right;
01035     dif = ali->hStart - ali->nStart;
01036     if (lastDif - 2 <= dif && dif <= lastDif + 2)
01037         {
01038         lastDif = dif;
01039         unlinkAli(pHitList, ali);
01040         ali->left = proto.hits;
01041         proto.hits = ali;
01042         proto.hEnd = ali->hEnd;
01043         proto.nEnd = ali->nEnd;
01044         ++lumpCount;
01045         }
01046     }
01047 proto.hits = ffMakeRightLinks(proto.hits);
01048 retProto = ffNeedMem(sizeof(*retProto));
01049 memcpy(retProto, &proto, sizeof(*retProto));
01050 return retProto;
01051 }

Here is the call graph for this function:

void lumpProtoGenes ( struct protoGene **  pList,
DNA ns,
DNA hs,
enum ffStringency  stringency 
)

Definition at line 993 of file fuzzyFind.c.

References bestMerger(), and mergeProtoGenes().

00996 {
00997 struct protoGene *a, *b;
00998 
00999 while (bestMerger(*pList, stringency, ns, hs, &a, &b))
01000     {
01001     mergeProtoGenes(pList, a, b);
01002     }
01003 }

Here is the call graph for this function:

static void makeFreqTable ( DNA dna,
int  dnaSize,
double  freq[4] 
) [static]

Definition at line 103 of file fuzzyFind.c.

References dnaBaseHistogram().

Referenced by findWovenTiles().

00104 {
00105 int histo[4];
00106 double total;
00107 int i;
00108 
00109 dnaBaseHistogram(dna, dnaSize, histo);
00110 total = histo[0] + histo[1] + histo[2] + histo[3];
00111 if (total == 0) total = 1;
00112 for (i=0; i<4; ++i)
00113     freq[i] = (double)histo[i] / total;
00114 }

Here is the call graph for this function:

Here is the caller graph for this function:

DNA* matchInMem ( DNA ns,
DNA ne,
DNA hs,
DNA he 
)

Definition at line 767 of file fuzzyFind.c.

Referenced by findAliBetween(), and rwFindTilesBetween().

00768 {
00769 int nLen = ne - ns;
00770 DNA c = *ns++;
00771 he -= nLen;
00772 nLen -= 1;
00773 while (hs < he)
00774     {   
00775     if (*hs++ == c && memcmp(ns, hs, nLen) == 0)
00776         {
00777         return hs-1;
00778         }
00779     }
00780 return NULL;
00781 }

Here is the caller graph for this function:

void mergeProtoGenes ( struct protoGene **  pList,
struct protoGene a,
struct protoGene b 
)

Definition at line 978 of file fuzzyFind.c.

References ffCat(), ffUnlink(), ffAli::hEnd, protoGene::hEnd, protoGene::hits, ffAli::hStart, protoGene::hStart, ffAli::nEnd, protoGene::nEnd, ffAli::nStart, and protoGene::nStart.

Referenced by lumpProtoGenes().

00980 {
00981 ffUnlink((struct ffAli**)pList, (struct ffAli *)b);
00982 ffCat(&a->hits, &b->hits);
00983 if (a->hStart > b->hStart)
00984     a->hStart = b->hStart;
00985 if (a->nStart > b->nStart)
00986     a->nStart = b->nStart;
00987 if (a->hEnd < b->hEnd)
00988     a->hEnd = b->hEnd;
00989 if (a->nEnd < b->nEnd)
00990     a->nEnd = b->nEnd;
00991 }

Here is the call graph for this function:

Here is the caller graph for this function:

static double oligoProb ( DNA oligo,
int  size,
double  freq[4] 
) [static]

Definition at line 116 of file fuzzyFind.c.

References ntVal.

Referenced by evalExactAli().

00117 {
00118 double prob = 1.0;
00119 int i;
00120 int baseVal;
00121 
00122 for (i=0; i<size; ++i)
00123     {
00124     if ((baseVal = ntVal[(int)oligo[i]]) >= 0)
00125         prob *= freq[baseVal];
00126     }
00127 return prob;
00128 }

Here is the caller graph for this function:

static struct ffAli* reconsiderAlignedGaps ( struct ffAli ali  )  [static, read]

Definition at line 541 of file fuzzyFind.c.

References dnaScoreMatch(), ffCdnaGapPenalty(), ffAli::hEnd, ffAli::hStart, ffAli::left, matchScore, ffAli::nEnd, ffAli::nStart, and ffAli::right.

Referenced by findBestAli().

00545 {
00546 struct ffAli *a = NULL;
00547 struct ffAli *left = NULL;
00548 
00549 if (ali == NULL)
00550     return NULL;
00551 a = ali;
00552 for (;;)
00553     {
00554     int gapSize;
00555 
00556     /* Advance to next ali */
00557     left = a;
00558     a = a->right;
00559     if (a == NULL)
00560         break;
00561     gapSize = a->nStart - left->nEnd;
00562     if (gapSize == a->hStart - left->hEnd)
00563         {
00564         int gapScore;
00565         int matchScore;
00566         gapScore = -ffCdnaGapPenalty(left, a);
00567         matchScore = dnaScoreMatch(left->nEnd, left->hEnd, gapSize);
00568         if (matchScore > gapScore)
00569             {
00570             /* Make current cover left. RemoveEmpty will take
00571              * care of empty shell of left later. */
00572             a->hStart = left->hEnd = left->hStart;
00573             a->nStart = left->nEnd = left->nStart;
00574             }
00575         }
00576     }
00577 return ali;
00578 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* recursiveWeave ( DNA ns,
DNA ne,
DNA hs,
DNA he,
enum ffStringency  stringency,
double  probMax,
int  level,
int  orientation 
) [static, read]

Definition at line 1255 of file fuzzyFind.c.

References exactAli(), ffExact, ffIntronOrientation(), ffAli::hEnd, ffAli::left, ffAli::nEnd, ffAli::right, and rwFindTilesBetween().

Referenced by findWovenTiles().

01259 {
01260 struct ffAli *left = NULL, *right = NULL, *aliList;
01261 
01262 if ((left = exactAli(ns, ne, hs, he)) != NULL)
01263     return left;
01264 if (stringency == ffExact)
01265     return NULL;
01266 
01267 aliList = rwFindTilesBetween(ns, ne, hs, he, stringency, probMax);
01268 if (aliList != NULL)
01269     {
01270     DNA *lne, *rns, *lhe, *rhs;
01271     int ndif, hdif;
01272     right = aliList;
01273     if (orientation == 0)
01274         orientation = ffIntronOrientation(aliList);
01275     for (;;)
01276         {
01277         /* Figure out the end points to recurse to. */
01278         if (left != NULL)
01279             {
01280             lne = left->nEnd;
01281             lhe = left->hEnd;
01282             }
01283         else
01284             {
01285             lne = ns;
01286             lhe = hs;
01287             }
01288         if (right != NULL)
01289             {
01290             rns = right->nStart;
01291             rhs = right->hStart;
01292             }
01293         else
01294             {
01295             rns = ne;
01296             rhs = he;
01297             }
01298         ndif = rns-lne;
01299         hdif = rhs-lhe;
01300         /* If a big enough gap left recurse. */
01301         if (ndif >= 5 && hdif >= 5)
01302             {
01303             struct ffAli *newLeft = NULL, *newRight;
01304 
01305             newLeft = recursiveWeave(lne, rns, lhe, rhs, stringency, probMax*2, level+1, 
01306                 orientation);
01307             if (newLeft != NULL)
01308                 {
01309                 /* Insert new tiles between left and right. */
01310                 
01311                 /* Find right end of tile set. */
01312                 newRight = newLeft;
01313                 while (newRight->right != NULL)
01314                     newRight = newRight->right;
01315                 
01316                 
01317                 if (left != NULL)
01318                     {
01319                     left->right = newLeft;
01320                     newLeft->left = left;
01321                     }
01322                 else
01323                     {
01324                     aliList = newLeft;
01325                     }
01326                 if (right != NULL)
01327                     {
01328                     right->left = newRight;
01329                     newRight->right = right;
01330                     }
01331                 }
01332             }
01333         if ((left = right) == NULL)
01334             break;
01335         right = right->right;
01336         }
01337     }
01338 return aliList;
01339 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean rightNextMatch ( struct ffAli ali,
DNA ns,
DNA ne,
DNA hs,
DNA he,
int  gapPenalty,
int  maxSkip 
) [static]

Definition at line 300 of file fuzzyFind.c.

References digitsBaseTwo(), FALSE, ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, and TRUE.

Referenced by expandRight().

00304 {
00305 int haySize = he - hs;
00306 int needleSize = ne - ns;
00307 int diagSize = haySize + needleSize;
00308 int matchSize;
00309 int i;
00310 
00311 /* We take care of bigger skips on the "tile" level. */
00312 if (diagSize > maxSkip)
00313     diagSize = maxSkip;
00314 /* Scan diagonally... */
00315 /*
00316 0 1 2 3
00317 1 2 3 4
00318 2 3 4 5
00319 3 4 5 6
00320 */
00321 for (i=1; i<=diagSize; ++i)
00322     {
00323     int hOff = i;
00324     int nOff = 0;
00325     int hDiff = hOff - haySize;
00326     matchSize = gapPenalty + digitsBaseTwo(i);
00327     if (hDiff > 0)
00328         {
00329         nOff += hDiff;
00330         hOff -= hDiff;
00331         }
00332     for (;hOff >= 0; --hOff, ++nOff)
00333         {
00334         int needleLeft = needleSize - nOff;
00335         int hayLeft = haySize - hOff;
00336         if (matchSize > needleLeft) break;
00337         if (matchSize > hayLeft) continue;
00338         if (ns[nOff] == hs[hOff] && memcmp(ns+nOff, hs+hOff, matchSize) == 0)
00339             {
00340             ali->nStart = ns + nOff;
00341             ali->nEnd = ns + nOff + matchSize;
00342             ali->hStart = hs + hOff;
00343             ali->hEnd = hs + hOff + matchSize;
00344             return TRUE;
00345             }
00346         }
00347     }
00348 return FALSE;
00349 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* rwFindTilesBetween ( DNA ns,
DNA ne,
DNA hs,
DNA he,
enum ffStringency  stringency,
double  probMax 
) [static, read]

Definition at line 1159 of file fuzzyFind.c.

References evalExactAli(), FALSE, ffExpandExactLeft(), ffExpandExactRight(), ffFindGoodOligo(), ffMakeRightLinks(), ffNeedMem(), matchInMem(), nextPowerOfFour(), ffAli::right, tileSize, and weaveAli().

Referenced by recursiveWeave().

01163 {
01164 int searchOffset;
01165 int endTileOffset;
01166 int haySize = he - hs;
01167 int needleSize = ne - ns;
01168 int numTiles = 0;
01169 int bestWeaveVal;
01170 int tileIx;
01171 struct ffAli *hitList = NULL;
01172 struct ffAli *bestAli;
01173 struct ffAli *ali;
01174 int possibleTiles;
01175 double tileProbOne;
01176 
01177 possibleTiles = (haySize - nextPowerOfFour(needleSize));
01178 if (possibleTiles < 1) possibleTiles = 1;
01179 tileProbOne = probMax/possibleTiles;
01180 
01181 searchOffset = 0;
01182 endTileOffset = 0;
01183 tileIx = 0;
01184 for (;;)
01185     {
01186     DNA *tile;
01187     int tileSize;
01188     double tileProb;
01189     DNA *h = hs;
01190 
01191     searchOffset = endTileOffset;
01192     if (!ffFindGoodOligo(ns+searchOffset, needleSize-searchOffset, tileProbOne,
01193         rwFreq, &tile, &tileSize, &tileProb))
01194         {
01195         break;
01196         }
01197     /* Find all parts that match the tile. */
01198     for (;;)
01199         {
01200         if ((h = matchInMem(tile, tile+tileSize, h, he)) == NULL)
01201             break;
01202         ali = ffNeedMem(sizeof(*ali));
01203         ali->hStart = h;
01204         ali->hEnd = ali->hStart + tileSize;
01205         ali->nStart = tile;
01206         ali->nEnd = tile + tileSize;
01207         ali->left = hitList;
01208         hitList = ali;
01209         h += tileSize;
01210         } 
01211     endTileOffset = tile + tileSize - ns;
01212     ++numTiles;
01213     }
01214 if (hitList == NULL)
01215     return NULL;
01216 
01217 hitList = ffMakeRightLinks(hitList);
01218 for (ali = hitList; ali != NULL; ali = ali->right)
01219     {
01220     ffExpandExactLeft(ali, ns, hs);
01221     ffExpandExactRight(ali, ne, he);
01222     }
01223 bestAli = weaveAli(hitList, ns, ne, hs, he, rwFreq, &bestWeaveVal, stringency);
01224 if (rwCheckGoodEnough)
01225     {
01226     double prob;
01227     prob = evalExactAli(bestAli, ns, ne, hs, he, numTiles, rwFreq);
01228     if (prob > 0.1)
01229         return NULL;
01230     rwCheckGoodEnough = FALSE;
01231     }
01232 
01233 return bestAli;
01234 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* saveAliToPermanentMem ( struct ffAli volatileAli  )  [static, read]

Definition at line 1398 of file fuzzyFind.c.

References ffAbort(), ffAli::left, needMem(), ffAli::right, and slFreeList().

Referenced by ffFind().

01400 {
01401 struct ffAli *leftList = NULL;
01402 struct ffAli *newAli, *ali;
01403 struct ffAli *rightList = NULL;
01404 
01405 /* Allocate memory, copy contents and set left pointer. */
01406 for (ali = volatileAli; ali != NULL; ali = ali->right)
01407     {
01408     newAli = needMem(sizeof(*newAli));
01409     if (newAli == NULL)
01410         {
01411         slFreeList(leftList);
01412         ffAbort();
01413         }
01414     memcpy(newAli, ali, sizeof(*newAli));
01415     newAli->left = leftList;
01416     leftList = newAli;
01417     }
01418 
01419 /* Set right pointer. */
01420 for (ali = leftList; ali != NULL; ali = ali->left)
01421     {
01422     ali->right = rightList;
01423     rightList = ali;
01424     }
01425 return rightList;
01426 }

Here is the call graph for this function:

Here is the caller graph for this function:

void setFfExtendThroughN ( boolean  val  ) 

Definition at line 605 of file fuzzyFind.c.

References extendThroughN.

Referenced by main().

00607 {
00608 extendThroughN = val;
00609 }

Here is the caller graph for this function:

void thinProtoList ( struct protoGene **  pList,
int  maxToTake 
)

Definition at line 1080 of file fuzzyFind.c.

References cmpProtoScore(), and slSort().

01083 {
01084 struct protoGene *newList = NULL, *pg, *next;
01085 int i;
01086 
01087 slSort(pList, cmpProtoScore);
01088 for (pg=*pList, i=0; pg != NULL && i < maxToTake; pg = next, ++i)
01089     {
01090     next = pg->left;
01091     pg->left = newList;
01092     newList = pg;
01093     }
01094 *pList = newList;
01095 }

Here is the call graph for this function:

static struct ffAli* trimAlis ( struct ffAli aliList  )  [static, read]

Definition at line 580 of file fuzzyFind.c.

References ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, and trimCount.

Referenced by findBestAli().

00582 {
00583 struct ffAli *ali;
00584 int trimCount = 0;
00585 for (ali = aliList; ali != NULL; ali = ali->right)
00586     {
00587     while (ali->nStart[0] != ali->hStart[0] && ali->nStart < ali->nEnd)
00588         {
00589         ali->nStart += 1;
00590         ali->hStart += 1;
00591         ++trimCount;
00592         }
00593     while (ali->nEnd[-1] != ali->hEnd[-1] && ali->nEnd > ali->nStart)
00594         {
00595         ali->nEnd -= 1;
00596         ali->hEnd -= 1;
00597         ++trimCount;
00598         }
00599     }
00600 return aliList;
00601 }

Here is the caller graph for this function:

void unlinkAli ( struct ffAli **  pList,
struct ffAli ali 
)

Definition at line 864 of file fuzzyFind.c.

References ffUnlink().

Referenced by lumpHits(), and weaveAli().

00866 {
00867 ffUnlink(pList, ali);
00868 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* weaveAli ( struct ffAli hitList,
DNA ns,
DNA ne,
DNA hs,
DNA he,
double  freq[4],
int *  rBestVal,
enum ffStringency  stringency 
) [static, read]

Definition at line 1097 of file fuzzyFind.c.

References ffAliSort(), ffCmpHitsHayFirst(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, and unlinkAli().

Referenced by rwFindTilesBetween().

01101 {
01102 struct ffAli *ali, *lastAli;
01103 struct protoGene *proto, *protoList = NULL;
01104 int protoCount = 0;
01105 
01106 /* Sort initial hits and remove duplicates. */
01107 ffAliSort(&hitList, ffCmpHitsHayFirst);
01108 ali = hitList;
01109 for (;;)
01110     {
01111     lastAli = ali;
01112     if ((ali = ali->right) == NULL)
01113         break;
01114     if (ali->hStart == lastAli->hStart && ali->hEnd == lastAli->hEnd
01115        && ali->nStart == lastAli->nStart && ali->nEnd == lastAli->nEnd)
01116        {
01117        unlinkAli(&hitList, lastAli);
01118        }
01119     }
01120 
01121 /* Lump together things which look to be separated only by small
01122  * amounts of noise. */
01123 while ((proto = lumpHits(&hitList, ns, hs)) != NULL)
01124     {
01125     proto->left = protoList;
01126     protoList = proto;
01127     ++protoCount;
01128     }
01129 if (protoCount > 200)
01130     {
01131     thinProtoList(&protoList, 200);
01132     }
01133 protoList = (struct protoGene *)ffMakeRightLinks((struct ffAli*)protoList);
01134 
01135 
01136 /* Lump together any other ones that can, starting with ones
01137  * that go together best. */
01138 ffAliSort((struct ffAli**)(void*)&protoList, cmpProtoNeedle);
01139 lumpProtoGenes(&protoList, ns, hs, stringency );
01140 for (proto = protoList; proto != NULL; proto = proto->right)
01141     {
01142     ffAliSort((struct ffAli**)&proto->hits, ffCmpHitsNeedleFirst);
01143 /*    removeThrowbackHits(proto); */
01144     proto->score = ffScore(proto->hits,stringency);
01145     }
01146 
01147 /* Sort by score and return the best. */
01148 ffAliSort((struct ffAli **)(void*)&protoList, cmpProtoScore);
01149 *rBestVal = protoList->score;
01150 return protoList->hits;
01151 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

int extendThroughN [static]

Definition at line 603 of file fuzzyFind.c.

Referenced by expandThroughNLeft(), expandThroughNRight(), and setFfExtendThroughN().

struct lm* ffMemPool = NULL [static]

Definition at line 83 of file fuzzyFind.c.

Referenced by ffMemCleanup(), ffMemInit(), and ffNeedMem().

jmp_buf ffRecover [static]

Definition at line 75 of file fuzzyFind.c.

Referenced by ffAbort(), and ffFind().

char const rcsid[] = "$Id: fuzzyFind.c,v 1.17 2006/05/09 01:05:55 markd Exp $" [static]

Definition at line 43 of file fuzzyFind.c.

boolean rwCheckGoodEnough [static]

Definition at line 1157 of file fuzzyFind.c.

double rwFreq[4] [static]

Definition at line 1155 of file fuzzyFind.c.

boolean rwIsCdna [static]

Definition at line 1156 of file fuzzyFind.c.


Generated on Tue Dec 25 19:24:14 2007 for blat by  doxygen 1.5.2