jkOwnLib/ffSeedExtend.c File Reference

#include "common.h"
#include "dnaseq.h"
#include "localmem.h"
#include "memalloc.h"
#include "bits.h"
#include "genoFind.h"
#include "fuzzyFind.h"
#include "supStitch.h"
#include "bandExt.h"
#include "gfInternal.h"

Include dependency graph for ffSeedExtend.c:

Go to the source code of this file.

Data Structures

struct  seqHashEl

Functions

static void extendExactRight (int qMax, int tMax, char **pEndQ, char **pEndT)
static void extendExactLeft (int qMax, int tMax, char **pStartQ, char **pStartT)
static void extendGaplessRight (int qMax, int tMax, int maxDrop, char **pEndQ, char **pEndT)
static void extendGaplessLeft (int qMax, int tMax, int maxDrop, char **pStartQ, char **pStartT)
static int ffHashFuncN (char *s, int seedSize)
static boolean totalDegenerateN (char *s, int seedSize)
static struct ffAliffFindExtendNmers (char *nStart, char *nEnd, char *hStart, char *hEnd, int seedSize)
static void clumpToExactRange (struct gfClump *clump, bioSeq *qSeq, int tileSize, int frame, struct trans3 *t3, struct gfRange **pRangeList)
static void addExtraHits (struct gfHit *hitList, int hitSize, struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli **pExtraList)
static struct ffAlifoldInExtras (struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList, struct ffAli *extraList)
static struct ffAliscanIndexForSmallExons (struct genoFind *gf, struct gfSeqSource *target, struct dnaSeq *qSeq, Bits *qMaskBits, int qMaskOffset, struct dnaSeq *tSeq, struct lm *lm, struct ffAli *ffList)
static void bandExtBefore (struct axtScoreScheme *ss, struct ffAli *ff, int qGap, int tGap, struct ffAli **pExtraList)
static void bandExtAfter (struct axtScoreScheme *ss, struct ffAli *ff, int qGap, int tGap, struct ffAli **pExtraList)
static struct ffAlibandedExtend (struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList)
static struct ffAliexpandGapless (struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList)
static int seedResolvePower (int seedSize, int resolveLimit)
static char * scanExactLeft (char *n, int nSize, int hSize, char *hEnd, int resolveLimit)
static char * scanExactRight (char *n, int nSize, int hSize, char *hStart, int resolveLimit)
static struct ffAlifillInExact (char *nStart, char *nEnd, char *hStart, char *hEnd, boolean isRc, boolean scanLeft, boolean scanRight, int resolveLimit)
static struct ffAlifindFromSmallerSeeds (char *nStart, char *nEnd, char *hStart, char *hEnd, boolean isRc, boolean scanLeft, boolean scanRight, int seedSize, int resolveLimit)
static int countT (char *s, int size)
static int countA (char *s, int size)
ffAliscanTinyOne (char *nStart, char *nEnd, char *hStart, char *hEnd, boolean isRc, boolean scanLeft, boolean scanRight, int seedSize)
static struct ffAliscanForSmallerExons (int seedSize, struct dnaSeq *qSeq, struct dnaSeq *tSeq, boolean isRc, struct ffAli *ffList)
static struct ffAliscanForTinyInternal (struct dnaSeq *qSeq, struct dnaSeq *tSeq, boolean isRc, struct ffAli *ffList)
static boolean tradeMismatchToCloseSpliceGap (struct ffAli *left, struct ffAli *right, int orientation)
static int calcSpliceScore (struct axtScoreScheme *ss, char a1, char a2, char b1, char b2, int orientation)
static void grabAroundIntron (char *hpStart, int iPos, int iSize, int modPeelSize, char *hSeq)
static struct ffAlihardRefineSplice (struct ffAli *left, struct ffAli *right, struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList, int orientation)
static struct ffAlirefineSpliceSites (struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList)
static boolean smoothOneGap (struct ffAli *left, struct ffAli *right, struct ffAli *ffList)
static struct ffAlismoothSmallGaps (struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList)
int aPenalty (char *s, int size)
static int trimGapPenalty (int hGap, int nGap, char *iStart, char *iEnd, int orientation)
static struct ffAlitrimFlakyEnds (struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList)
static void refineBundle (struct genoFind *gf, struct dnaSeq *qSeq, Bits *qMaskBits, int qMaskOffset, struct dnaSeq *tSeq, struct lm *lm, struct ssBundle *bun, boolean isRc)
ssBundleffSeedExtInMem (struct genoFind *gf, struct dnaSeq *qSeq, Bits *qMaskBits, int qOffset, struct lm *lm, int minScore, boolean isRc)

Variables

static char const rcsid [] = "$Id: ffSeedExtend.c,v 1.36 2006/06/22 16:24:43 kent Exp $"


Function Documentation

static void addExtraHits ( struct gfHit hitList,
int  hitSize,
struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli **  pExtraList 
) [static]

Definition at line 292 of file ffSeedExtend.c.

References AllocVar, dnaSeq::dna, ffExpandExactLeft(), ffExpandExactRight(), ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, gfHit::next, ffAli::nStart, gfHit::qStart, dnaSeq::size, and gfHit::tStart.

Referenced by scanIndexForSmallExons().

00295 {
00296 struct gfHit *hit;
00297 struct ffAli *ff;
00298 char *qs = qSeq->dna, *ts = tSeq->dna;
00299 char *qe = qs + qSeq->size,  *te = ts + tSeq->size;
00300 for (hit = hitList; hit != NULL; hit = hit->next)
00301     {
00302     AllocVar(ff);
00303     ff->nStart = ff->nEnd = qs + hit->qStart;
00304     ff->hStart = ff->hEnd = ts + hit->tStart;
00305     ff->nEnd += hitSize;
00306     ff->hEnd += hitSize;
00307     ff->left = *pExtraList;
00308     ffExpandExactLeft(ff, qs, ts);
00309     ffExpandExactRight(ff, qe, te);
00310     *pExtraList = ff;
00311     }
00312 }

Here is the call graph for this function:

Here is the caller graph for this function:

int aPenalty ( char *  s,
int  size 
)

Definition at line 1116 of file ffSeedExtend.c.

Referenced by trimFlakyEnds().

01118 {
01119 int aCount = 0, tCount = 0;
01120 int i;
01121 char c;
01122 for (i=0; i<size; ++i)
01123     {
01124     c = s[i];
01125     if (c == 'a') ++aCount;
01126     if (c == 't') ++tCount;
01127     }
01128 if (tCount > aCount) aCount = tCount;
01129 if (aCount >= size)
01130     return aCount-1;
01131 else if (aCount >= size*0.75)
01132     return aCount * 0.90;
01133 else
01134     return 0;
01135 }

Here is the caller graph for this function:

static struct ffAli* bandedExtend ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList 
) [static, read]

Definition at line 462 of file ffSeedExtend.c.

References axtScoreSchemeRnaDefault(), bandExtAfter(), bandExtBefore(), dnaSeq::dna, foldInExtras(), dnaSeq::size, and ss.

Referenced by refineBundle().

00464 {
00465 struct ffAli *extraList = NULL, *ff = ffList, *lastFf = NULL;
00466 struct axtScoreScheme *ss = axtScoreSchemeRnaDefault();
00467 int qGap, tGap;
00468 
00469 if (ff == NULL)
00470     return NULL;
00471 
00472 /* Look for initial gap. */
00473 qGap = ff->nStart - qSeq->dna;
00474 tGap = ff->hStart - tSeq->dna;
00475 bandExtBefore(ss, ff, qGap, tGap, &extraList);
00476 
00477 /* Look for middle gaps. */
00478 for (;;)
00479     {
00480     lastFf = ff;
00481     ff = ff->right;
00482     if (ff == NULL)
00483         break;
00484     qGap = ff->nStart - lastFf->nEnd;
00485     tGap = ff->hStart - lastFf->hEnd;
00486     bandExtAfter(ss, lastFf, qGap, tGap, &extraList);
00487     bandExtBefore(ss, ff, qGap, tGap, &extraList);
00488     }
00489 
00490 /* Look for end gaps. */
00491 qGap = qSeq->dna + qSeq->size - lastFf->nEnd;
00492 tGap = tSeq->dna + tSeq->size - lastFf->hEnd;
00493 bandExtAfter(ss, lastFf, qGap, tGap, &extraList);
00494 
00495 ffList = foldInExtras(qSeq, tSeq, ffList, extraList);
00496 return ffList;
00497 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void bandExtAfter ( struct axtScoreScheme ss,
struct ffAli ff,
int  qGap,
int  tGap,
struct ffAli **  pExtraList 
) [static]

Definition at line 443 of file ffSeedExtend.c.

References bandExtFf(), ffCat(), ffAli::hEnd, maxGap, min, ffAli::nEnd, and ss.

Referenced by bandedExtend(), and findFromSmallerSeeds().

00447 {
00448 struct ffAli *ext;
00449 int minGap = min(qGap, tGap);
00450 int maxGap = minGap * 2;
00451 if (minGap > 0)
00452     {
00453     if (qGap > maxGap) qGap = maxGap;
00454     if (tGap > maxGap) tGap = maxGap;
00455     ext = bandExtFf(NULL, ss, 3, ff, ff->nEnd, ff->nEnd + qGap,
00456             ff->hEnd, ff->hEnd + tGap, 1, maxGap);
00457     ffCat(pExtraList, &ext);
00458     }
00459 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void bandExtBefore ( struct axtScoreScheme ss,
struct ffAli ff,
int  qGap,
int  tGap,
struct ffAli **  pExtraList 
) [static]

Definition at line 425 of file ffSeedExtend.c.

References bandExtFf(), ffCat(), ffAli::hStart, maxGap, min, ffAli::nStart, and ss.

Referenced by bandedExtend(), and findFromSmallerSeeds().

00429 {
00430 struct ffAli *ext;
00431 int minGap = min(qGap, tGap);
00432 int maxGap = minGap * 2;
00433 if (minGap > 0)
00434     {
00435     if (qGap > maxGap) qGap = maxGap;
00436     if (tGap > maxGap) tGap = maxGap;
00437     ext = bandExtFf(NULL, ss, 3, ff, ff->nStart - qGap, ff->nStart, 
00438             ff->hStart - tGap, ff->hStart, -1, maxGap);
00439     ffCat(pExtraList, &ext);
00440     }
00441 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int calcSpliceScore ( struct axtScoreScheme ss,
char  a1,
char  a2,
char  b1,
char  b2,
int  orientation 
) [static]

Definition at line 799 of file ffSeedExtend.c.

References matchScore, and ss.

Referenced by hardRefineSplice().

00802 {
00803 int score = 0;
00804 int matchScore = ss->matrix['c']['c'];
00805 if (orientation >= 0)  /* gt/ag or gc/ag */
00806     {
00807     score += ss->matrix[(int)a1]['g'];
00808     if (a2 != 'c')
00809         score += ss->matrix[(int)a2]['t'];
00810     score += ss->matrix[(int)b1]['a'];
00811     score += ss->matrix[(int)b2]['g'];
00812     }
00813 else                   /* ct/ac or ct/gc */
00814     {
00815     score += ss->matrix[(int)a1]['c'];
00816     score += ss->matrix[(int)a2]['t'];
00817     if (b1 != 'g')
00818         score += ss->matrix[(int)b1]['a'];
00819     score += ss->matrix[(int)b2]['c'];
00820     }
00821 if (score >= 3* matchScore)
00822     score += matchScore;
00823 return score;
00824 }

Here is the caller graph for this function:

static void clumpToExactRange ( struct gfClump clump,
bioSeq qSeq,
int  tileSize,
int  frame,
struct trans3 t3,
struct gfRange **  pRangeList 
) [static]

Definition at line 206 of file ffSeedExtend.c.

References AllocVar, cloneString(), dnaSeq::dna, extendExactLeft(), extendExactRight(), FALSE, gfRange::frame, gfRange::hitCount, gfClump::hitList, internalErr, dnaSeq::name, gfHit::next, gfRange::qEnd, gfRange::qStart, gfHit::qStart, gfSeqSource::seq, dnaSeq::size, slAddHead, gfSeqSource::start, gfRange::t3, gfClump::target, gfRange::tEnd, gfRange::tName, TRUE, gfRange::tSeq, gfRange::tStart, and gfHit::tStart.

Referenced by ffSeedExtInMem().

00210 {
00211 struct gfSeqSource *target = clump->target;
00212 aaSeq *tSeq = target->seq;
00213 BIOPOL *qs, *ts, *qe, *te;
00214 struct gfHit *hit;
00215 int qStart = 0, tStart = 0, qEnd = 0, tEnd = 0, newQ = 0, newT = 0;
00216 boolean outOfIt = TRUE;         /* Logically outside of a clump. */
00217 struct gfRange *range;
00218 BIOPOL *lastQs = NULL, *lastQe = NULL, *lastTs = NULL, *lastTe = NULL;
00219 
00220 if (tSeq == NULL)
00221     internalErr();
00222 
00223 /* The termination condition of this loop is a little complicated.
00224  * We want to output something either when the next hit can't be
00225  * merged into the previous, or at the end of the list.  To avoid
00226  * duplicating the output code we're forced to complicate the loop
00227  * termination logic.  Hence the check for hit == NULL to break
00228  * the loop is not until near the end of the loop. */
00229 for (hit = clump->hitList; ; hit = hit->next)
00230     {
00231     if (hit != NULL)
00232         {
00233         newQ = hit->qStart;
00234         newT = hit->tStart - target->start;
00235         }
00236 
00237     /* See if it's time to output merged (diagonally adjacent) hits. */
00238     if (!outOfIt)       /* Not first time through. */
00239         {
00240         /* As a micro-optimization handle strings of adjacent hits
00241          * specially.  Don't do the extensions until we've merged
00242          * all adjacent hits. */
00243         if (hit == NULL || newQ != qEnd || newT != tEnd)
00244             {
00245             qs = qSeq->dna + qStart;
00246             ts = tSeq->dna + tStart;
00247             qe = qSeq->dna + qEnd;
00248             te = tSeq->dna + tEnd;
00249             extendExactRight(qSeq->size - qEnd, tSeq->size - tEnd,
00250                 &qe, &te);
00251             extendExactLeft(qStart, tStart, &qs, &ts);
00252             if (qs != lastQs || ts != lastTs || qe != lastQe || qs !=  lastQs)
00253                 {
00254                 lastQs = qs;
00255                 lastTs = ts;
00256                 lastQe = qe;
00257                 lastTe = te;
00258                 AllocVar(range);
00259                 range->qStart = qs - qSeq->dna;
00260                 range->qEnd = qe - qSeq->dna;
00261                 range->tName = cloneString(tSeq->name);
00262                 range->tSeq = tSeq;
00263                 range->tStart = ts - tSeq->dna;
00264                 range->tEnd = te - tSeq->dna;
00265                 range->hitCount = qe - qs;
00266                 range->frame = frame;
00267                 range->t3 = t3;
00268                 slAddHead(pRangeList, range);
00269                 }
00270             outOfIt = TRUE;
00271             }
00272         }
00273     if (hit == NULL)
00274         break;
00275 
00276     if (outOfIt)
00277         {
00278         qStart = newQ;
00279         qEnd = qStart + tileSize;
00280         tStart = newT;
00281         tEnd = tStart + tileSize;
00282         outOfIt = FALSE;
00283         }
00284     else
00285         {
00286         qEnd = newQ + tileSize;
00287         tEnd = newT + tileSize;
00288         }
00289     }
00290 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int countA ( char *  s,
int  size 
) [static]

Definition at line 682 of file ffSeedExtend.c.

Referenced by scanTinyOne().

00684 {
00685 int count = 0;
00686 int i;
00687 for (i=size-1; i >= 0; --i)
00688     {
00689     if (s[i] == 'a')
00690         ++count;
00691     else
00692         break;
00693     }
00694 return count;
00695 }

Here is the caller graph for this function:

static int countT ( char *  s,
int  size 
) [static]

Definition at line 670 of file ffSeedExtend.c.

Referenced by scanTinyOne().

00672 {
00673 int i;
00674 for (i=0; i<size; ++i)
00675     {
00676     if (s[i] != 't')
00677         break;
00678     }
00679 return i;
00680 }

Here is the caller graph for this function:

static struct ffAli* expandGapless ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList 
) [static, read]

Definition at line 501 of file ffSeedExtend.c.

References dnaSeq::dna, extendGaplessLeft(), extendGaplessRight(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, and dnaSeq::size.

Referenced by refineBundle().

00504 {
00505 struct ffAli *ff = ffList, *lastFf = NULL;
00506 char *nStart = qSeq->dna;
00507 char *nEnd = qSeq->dna + qSeq->size;
00508 char *hStart = tSeq->dna;
00509 char *hEnd = tSeq->dna + tSeq->size;
00510 
00511 /* Look for initial gap. */
00512 extendGaplessLeft(ff->nStart - nStart, ff->hStart - hStart, 
00513         9, &ff->nStart, &ff->hStart);
00514 
00515 /* Look for middle gaps. */
00516 for (;;)
00517     {
00518     lastFf = ff;
00519     ff = ff->right;
00520     if (ff == NULL)
00521         break;
00522     extendGaplessRight(nEnd - lastFf->nEnd, hEnd - lastFf->hEnd, 9, 
00523         &lastFf->nEnd, &lastFf->hEnd);
00524     extendGaplessLeft(ff->nStart - nStart, ff->hStart - hStart, 9, 
00525         &ff->nStart, &ff->hStart);
00526     }
00527 extendGaplessRight(nEnd - lastFf->nEnd,
00528         hEnd - lastFf->hEnd, 9,
00529         &lastFf->nEnd, &lastFf->hEnd);
00530 return ffList;
00531 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void extendExactLeft ( int  qMax,
int  tMax,
char **  pStartQ,
char **  pStartT 
) [static]

Definition at line 35 of file ffSeedExtend.c.

References min.

Referenced by clumpToExactRange(), and ffFindExtendNmers().

00037 {
00038 int last = min(qMax, tMax);
00039 int i;
00040 char *q = *pStartQ - 1, *t = *pStartT - 1;
00041 
00042 for (i=0; i<last; ++i)
00043     {
00044     if (*q != *t)
00045         break;
00046     q -= 1;
00047     t -= 1;
00048     }
00049 *pStartQ = q + 1;
00050 *pStartT = t + 1;
00051 }

Here is the caller graph for this function:

static void extendExactRight ( int  qMax,
int  tMax,
char **  pEndQ,
char **  pEndT 
) [static]

Definition at line 17 of file ffSeedExtend.c.

References min.

Referenced by clumpToExactRange(), and ffFindExtendNmers().

00019 {
00020 int last = min(qMax, tMax);
00021 int i;
00022 char *q = *pEndQ, *t = *pEndT;
00023 
00024 for (i=0; i<last; ++i)
00025     {
00026     if (*q != *t)
00027         break;
00028     q += 1;
00029     t += 1;
00030     }
00031 *pEndQ = q;
00032 *pEndT = t;
00033 }

Here is the caller graph for this function:

static void extendGaplessLeft ( int  qMax,
int  tMax,
int  maxDrop,
char **  pStartQ,
char **  pStartT 
) [static]

Definition at line 85 of file ffSeedExtend.c.

References min.

Referenced by expandGapless().

00088 {
00089 int score = 0, bestScore = -1, bestPos = 0;
00090 int last = -min(qMax, tMax);
00091 int i;
00092 char *q = *pStartQ, *t = *pStartT;
00093 
00094 for (i=-1; i>=last; --i)
00095     {
00096     if (q[i] == t[i])
00097         {
00098         ++score;
00099         if (score > bestScore)
00100             {
00101             bestScore = score;
00102             bestPos = i;
00103             }
00104         }
00105     else
00106         {
00107         score -= 3;
00108         if (bestScore - score >= maxDrop)
00109             break;
00110         }
00111     }
00112 *pStartQ = q+bestPos;
00113 *pStartT = t+bestPos;
00114 }

Here is the caller graph for this function:

static void extendGaplessRight ( int  qMax,
int  tMax,
int  maxDrop,
char **  pEndQ,
char **  pEndT 
) [static]

Definition at line 53 of file ffSeedExtend.c.

References min.

Referenced by expandGapless().

00056 {
00057 int last = min(qMax, tMax);
00058 int i;
00059 char *q = *pEndQ, *t = *pEndT;
00060 int score = 0, bestScore = -1, bestPos = -1;
00061 
00062 for (i=0; i<last; ++i)
00063     {
00064     if (q[i] == t[i])
00065         {
00066         ++score;
00067         if (score > bestScore)
00068             {
00069             bestScore = score;
00070             bestPos = i;
00071             }
00072         }
00073     else
00074         {
00075         score -= 3;
00076         if (bestScore - score >= maxDrop)
00077             break;
00078         }
00079     }
00080 ++bestPos;
00081 *pEndQ = q+bestPos;
00082 *pEndT = t+bestPos;
00083 }

Here is the caller graph for this function:

static struct ffAli* ffFindExtendNmers ( char *  nStart,
char *  nEnd,
char *  hStart,
char *  hEnd,
int  seedSize 
) [static, read]

Definition at line 155 of file ffSeedExtend.c.

References AllocVar, extendExactLeft(), extendExactRight(), ffHashFuncN(), lm, lmAllocArray, lmAllocVar, lmInit(), seqHashEl::next, seqHashEl::seq, slAddHead, and totalDegenerateN().

Referenced by findFromSmallerSeeds().

00158 {
00159 struct lm *lm = lmInit(32*1024);
00160 struct seqHashEl **hashTable, *hashEl, **hashSlot;
00161 struct ffAli *ffList = NULL, *ff;
00162 char *n = nStart, *h = hStart, *ne = nEnd - seedSize, *he = hEnd - seedSize;
00163 
00164 /* Hash the needle. */
00165 lmAllocArray(lm, hashTable, 4*1024);
00166 while (n <= ne)
00167     {
00168     if (!totalDegenerateN(n, seedSize))
00169         {
00170         hashSlot = ffHashFuncN(n, seedSize) + hashTable;
00171         lmAllocVar(lm, hashEl);
00172         hashEl->seq = n;
00173         slAddHead(hashSlot, hashEl);
00174         }
00175     ++n;
00176     }
00177 
00178 /* Scan the haystack adding hits. */
00179 while (h <= he)
00180     {
00181     for (hashEl = hashTable[ffHashFuncN(h, seedSize)]; 
00182         hashEl != NULL; hashEl = hashEl->next)
00183         {
00184         if (memcmp(hashEl->seq, h, seedSize) == 0)
00185             {
00186             AllocVar(ff);
00187             ff->hStart = h;
00188             ff->hEnd = h + seedSize;
00189             ff->nStart = hashEl->seq;
00190             ff->nEnd = hashEl->seq + seedSize;
00191             extendExactLeft(ff->nStart - nStart, ff->hStart - hStart, 
00192                 &ff->nStart, &ff->hStart);
00193             extendExactRight(nEnd - ff->nEnd, hEnd - ff->hEnd, &ff->nEnd, &ff->hEnd);
00194             ff->left = ffList;
00195             ffList = ff;
00196             }
00197         }
00198     ++h;
00199     }
00200 ffList = ffMakeRightLinks(ffList);
00201 ffList = ffMergeClose(ffList, nStart, hStart);
00202 lmCleanup(&lm);
00203 return ffList;
00204 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int ffHashFuncN ( char *  s,
int  seedSize 
) [static]

Definition at line 116 of file ffSeedExtend.c.

References ntVal.

Referenced by ffFindExtendNmers().

00118 {
00119 int acc = 0;
00120 int i;
00121 for (i=0; i<seedSize; ++i)
00122     {
00123     acc <<= 1;
00124     acc += ntVal[(int)s[i]];
00125     }
00126 return acc&0xfff;
00127 }

Here is the caller graph for this function:

struct ssBundle* ffSeedExtInMem ( struct genoFind gf,
struct dnaSeq qSeq,
Bits qMaskBits,
int  qOffset,
struct lm lm,
int  minScore,
boolean  isRc 
) [read]

Definition at line 1251 of file ffSeedExtend.c.

References AllocVar, clumpToExactRange(), FALSE, ffCdna, ffIntronMax, gfClumpFreeList(), gfFindClumpsWithQmask(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), lm, gfClump::next, rangeList, refineBundle(), slAddHead, slSort(), ssStitch(), genoFind::tileSize, and TRUE.

01254 {
01255 struct ssBundle *bunList = NULL, *bun;
01256 int hitCount;
01257 struct gfClump *clumpList, *clump;
01258 struct gfRange *rangeList = NULL, *range;
01259 struct dnaSeq *tSeq;
01260 
01261 clumpList = gfFindClumpsWithQmask(gf, qSeq, qMaskBits, qOffset, lm, &hitCount);
01262 for (clump = clumpList; clump != NULL; clump = clump->next)
01263     clumpToExactRange(clump, qSeq, gf->tileSize, 0, NULL, &rangeList);
01264 slSort(&rangeList, gfRangeCmpTarget);
01265 rangeList = gfRangesBundle(rangeList, ffIntronMax);
01266 for (range = rangeList; range != NULL; range = range->next)
01267     {
01268     range->qStart += qOffset;
01269     range->qEnd += qOffset;
01270     tSeq = range->tSeq;
01271     AllocVar(bun);
01272     bun->qSeq = qSeq;
01273     bun->genoSeq = tSeq;
01274     bun->ffList = gfRangesToFfItem(range->components, qSeq);
01275     bun->isProt = FALSE;
01276     bun->avoidFuzzyFindKludge = TRUE;
01277     ssStitch(bun, ffCdna, 16, 10);
01278     refineBundle(gf, qSeq, qMaskBits, qOffset, tSeq, lm, bun, isRc);
01279     slAddHead(&bunList, bun);
01280     }
01281 gfRangeFreeList(&rangeList);
01282 gfClumpFreeList(&clumpList);
01283 return bunList;
01284 }

Here is the call graph for this function:

static struct ffAli* fillInExact ( char *  nStart,
char *  nEnd,
char *  hStart,
char *  hEnd,
boolean  isRc,
boolean  scanLeft,
boolean  scanRight,
int  resolveLimit 
) [static, read]

Definition at line 591 of file ffSeedExtend.c.

References AllocVar, min, scanExactLeft(), and scanExactRight().

Referenced by scanForTinyInternal(), and scanTinyOne().

00597 {
00598 struct ffAli *ff = NULL;
00599 char *hPos = NULL;
00600 int nGap = nEnd - nStart;
00601 int hGap = hEnd - hStart;
00602 int minGap = min(nGap, hGap);
00603 
00604 if (minGap <= 2)
00605     return NULL;
00606 if (scanLeft)
00607     {
00608     if ((hPos = scanExactLeft(nStart, nGap, hGap, hEnd, resolveLimit)) != NULL)
00609         {
00610         AllocVar(ff);
00611         ff->nStart = nStart;
00612         ff->nEnd = nEnd;
00613         ff->hStart = hPos;
00614         ff->hEnd = hPos + nGap;
00615         return ff;
00616         }
00617     }
00618 else
00619     {
00620     if ((hPos = scanExactRight(nStart, nGap, hGap, hStart, resolveLimit)) != NULL)
00621         {
00622         AllocVar(ff);
00623         ff->nStart = nStart;
00624         ff->nEnd = nEnd;
00625         ff->hStart = hPos;
00626         ff->hEnd = hPos + nGap;
00627         return ff;
00628         }
00629     }
00630 return NULL;
00631 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* findFromSmallerSeeds ( char *  nStart,
char *  nEnd,
char *  hStart,
char *  hEnd,
boolean  isRc,
boolean  scanLeft,
boolean  scanRight,
int  seedSize,
int  resolveLimit 
) [static, read]

Definition at line 633 of file ffSeedExtend.c.

References axtScoreSchemeRnaDefault(), bandExtAfter(), bandExtBefore(), ffCat(), ffFindExtendNmers(), ffAli::right, seedResolvePower(), and ss.

Referenced by scanTinyOne().

00637 {
00638 int nGap = nEnd - nStart;
00639 if (nGap >= seedSize)
00640     {
00641     struct ffAli *ffList;
00642     if (scanLeft || scanRight)
00643         {
00644         int hGap = hEnd - hStart;
00645         int maxSize = seedResolvePower(seedSize, resolveLimit);
00646         if (hGap > maxSize) hGap = maxSize;
00647         if (scanLeft)
00648             hStart = hEnd - hGap;
00649         if (scanRight)
00650             hEnd = hStart + hGap;
00651         }
00652     ffList = ffFindExtendNmers(nStart, nEnd, hStart, hEnd, seedSize);
00653     if (ffList != NULL)
00654         {
00655         struct ffAli *extensions = NULL, *ff;
00656         struct axtScoreScheme *ss = axtScoreSchemeRnaDefault();
00657         for (ff = ffList; ff != NULL; ff = ff->right)
00658             {
00659             bandExtBefore(ss, ff, ff->nStart - nStart, ff->hStart - hStart, &extensions);
00660             bandExtAfter(ss, ff, nEnd - ff->nEnd, hEnd - ff->hEnd, &extensions);
00661             }
00662         ffCat(&ffList, &extensions);
00663         }
00664 
00665     return ffList;
00666     }
00667 return NULL;
00668 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* foldInExtras ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList,
struct ffAli extraList 
) [static, read]

Definition at line 314 of file ffSeedExtend.c.

References AllocVar, ssBundle::avoidFuzzyFindKludge, ssFfItem::ff, ffCdna, ssBundle::ffList, ssBundle::genoSeq, ssBundle::qSeq, slAddHead, ssBundleFree(), ssStitch(), and TRUE.

Referenced by bandedExtend(), scanForSmallerExons(), scanForTinyInternal(), and scanIndexForSmallExons().

00318 {
00319 if (extraList != NULL)
00320     {
00321     struct ssBundle *bun;
00322     struct ssFfItem *ffi;
00323     AllocVar(bun);
00324     bun->qSeq = qSeq;
00325     bun->genoSeq = tSeq;
00326     bun->avoidFuzzyFindKludge = TRUE;
00327     AllocVar(ffi);
00328     ffi->ff = ffList;
00329     slAddHead(&bun->ffList, ffi);
00330     AllocVar(ffi);
00331     ffi->ff = extraList;
00332     slAddHead(&bun->ffList, ffi);
00333     ssStitch(bun, ffCdna, 16, 1);
00334     if (bun->ffList != NULL)
00335         {
00336         ffList = bun->ffList->ff;
00337         bun->ffList->ff = NULL;
00338         }
00339     else
00340         {
00341         ffList = NULL;
00342         }
00343     ssBundleFree(&bun);
00344     }
00345 return ffList;
00346 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void grabAroundIntron ( char *  hpStart,
int  iPos,
int  iSize,
int  modPeelSize,
char *  hSeq 
) [static]

Definition at line 826 of file ffSeedExtend.c.

Referenced by hardRefineSplice().

00829 {
00830 memcpy(hSeq, hpStart, iPos);
00831 memcpy(hSeq+iPos, hpStart+iPos+iSize, modPeelSize - iPos);
00832 hSeq[modPeelSize] = 0;
00833 }

Here is the caller graph for this function:

static struct ffAli* hardRefineSplice ( struct ffAli left,
struct ffAli right,
struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList,
int  orientation 
) [static, read]

Definition at line 857 of file ffSeedExtend.c.

References AllocVar, ArraySize, axtScoreSchemeRnaDefault(), axtScoreSym(), bandExt(), calcSpliceScore(), grabAroundIntron(), ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, ss, TRUE, and verbose().

Referenced by refineSpliceSites().

00862 {
00863 /* Strategy: peel back about 6 bases on either side of intron.
00864  * Then try positioning the intron at each position in the
00865  * peeled area and assessing score. */
00866 int peelSize = 12;
00867 char nSeq[12+1], hSeq[12+1+1];
00868 char nSym[25], hSym[25];
00869 int symCount;
00870 int seqScore, spliceScore, score, maxScore = 0;
00871 int nGap = right->nStart - left->nEnd;
00872 int hGap = right->hStart - left->hEnd;
00873 int peelLeft = (peelSize - nGap)/2;
00874 int intronSize = hGap - nGap;
00875 char *npStart = left->nEnd - peelLeft;
00876 char *npEnd = npStart + peelSize;
00877 char *hpStart = left->hEnd - peelLeft;
00878 char *hpEnd = npEnd + (right->hStart - right->nStart);
00879 struct axtScoreScheme *ss = axtScoreSchemeRnaDefault();
00880 static int modSize[3] = {0, 1, -1};
00881 int modIx;
00882 int bestPos = -1, bestMod = 0;
00883 int iPos;
00884 
00885 memcpy(nSeq, npStart, peelSize);
00886 nSeq[peelSize] = 0;
00887 for (modIx=0; modIx < ArraySize(modSize); ++modIx)
00888     {
00889     int modOne = modSize[modIx];
00890     int modPeelSize = peelSize - modOne;
00891     int iSize = intronSize + modOne;
00892     for (iPos=0; iPos <= modPeelSize; iPos++)
00893         {
00894         grabAroundIntron(hpStart, iPos, iSize, modPeelSize, hSeq);
00895         if (bandExt(TRUE, ss, 2, nSeq, peelSize, hSeq, modPeelSize, 1,
00896                 sizeof(hSym), &symCount, nSym, hSym, NULL, NULL))
00897             {
00898             seqScore = axtScoreSym(ss, symCount, nSym, hSym);
00899             spliceScore = calcSpliceScore(ss, hpStart[iPos], hpStart[iPos+1],
00900                     hpStart[iPos+iSize-2], hpStart[iPos+iSize-1], orientation);
00901             score = seqScore + spliceScore;
00902             if (score > maxScore)
00903                 {
00904                 maxScore = score;
00905                 bestPos = iPos;
00906                 bestMod = modOne;
00907                 }
00908             }
00909         }
00910     }
00911 if (maxScore > 0)
00912     {
00913     int modPeelSize = peelSize - bestMod;
00914     int i,diff, cutSymIx = 0;
00915     int nIx, hIx;
00916     struct ffAli *ff;
00917 
00918     /* Regenerate the best alignment. */
00919     grabAroundIntron(hpStart, bestPos, intronSize + bestMod, modPeelSize, hSeq); 
00920     bandExt(TRUE, ss, 2, nSeq, peelSize, hSeq, modPeelSize, 1,
00921             sizeof(hSym), &symCount, nSym, hSym, NULL, NULL);
00922 
00923     /* Peel back surrounding ffAli's */
00924     if (left->nStart > npStart || right->nEnd < npEnd)
00925         {
00926         /* It would take a lot of code to handle this case. 
00927          * I believe it is rare enough that it's not worth
00928          * it.  This verbosity will help keep track of how
00929          * often it comes up. */
00930         verbose(2, "Unable to peel in hardRefineSplice\n");
00931         return ffList;
00932         }
00933     diff = left->nEnd - npStart;
00934     left->nEnd -= diff;
00935     left->hEnd -= diff;
00936     diff = right->nStart - npEnd;
00937     right->nStart -= diff;
00938     right->hStart -= diff;
00939 
00940     /* Step through base by base alignment from the left until
00941      * hit intron, converting it into ffAli format. */
00942     nIx = hIx = 0;
00943     ff = left;
00944     for (i=0; i<symCount; ++i)
00945         {
00946         if (hIx == bestPos)
00947             {
00948             cutSymIx = i;
00949             break;
00950             }
00951         if (hSym[i] == '-')
00952             {
00953             ff = NULL;
00954             nIx += 1;
00955             }
00956         else if (nSym[i] == '-')
00957             {
00958             ff = NULL;
00959             hIx += 1;
00960             }
00961         else
00962             {
00963             if (ff == NULL)
00964                 {
00965                 AllocVar(ff);
00966                 ff->left = left;
00967                 ff->right = right;
00968                 left->right = ff;
00969                 right->left = ff;
00970                 left = ff;
00971                 ff->nStart = ff->nEnd = npStart + nIx;
00972                 ff->hStart = ff->hEnd = hpStart + hIx;
00973                 }
00974             ++nIx;
00975             ++hIx;
00976             ff->nEnd += 1;
00977             ff->hEnd += 1;
00978             }
00979         }
00980 
00981     /* Step through base by base alignment from the right until
00982      * hit intron, converting it into ffAli format. */
00983     ff = right;
00984     hIx = nIx = 0;      /* Index from right side. */
00985     for (i = symCount-1; i >= cutSymIx; --i)
00986         {
00987         if (hSym[i] == '-')
00988             {
00989             ff = NULL;
00990             nIx += 1;
00991             }
00992         else if (nSym[i] == '-')
00993             {
00994             ff = NULL;
00995             hIx += 1;
00996             }
00997         else
00998             {
00999             if (ff == NULL)
01000                 {
01001                 AllocVar(ff);
01002                 ff->left = left;
01003                 ff->right = right;
01004                 left->right = ff;
01005                 right->left = ff;
01006                 left = ff;
01007                 ff->nStart = ff->nEnd = npEnd - nIx;
01008                 ff->hStart = ff->hEnd = hpEnd - hIx;
01009                 }
01010             ++nIx;
01011             ++hIx;
01012             ff->nStart -= 1;
01013             ff->hStart -= 1;
01014             }
01015         }
01016     }
01017 return ffList;
01018 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void refineBundle ( struct genoFind gf,
struct dnaSeq qSeq,
Bits qMaskBits,
int  qMaskOffset,
struct dnaSeq tSeq,
struct lm lm,
struct ssBundle bun,
boolean  isRc 
) [static]

Definition at line 1222 of file ffSeedExtend.c.

References bandedExtend(), expandGapless(), ssFfItem::ff, ffCdna, ssBundle::ffList, gfFindNamedSource(), lm, dnaSeq::name, ssFfItem::next, refineSpliceSites(), scanForSmallerExons(), scanForTinyInternal(), scanIndexForSmallExons(), smoothSmallGaps(), ssStitch(), genoFind::tileSize, and trimFlakyEnds().

Referenced by ffSeedExtInMem().

01226 {
01227 struct ssFfItem *ffi;
01228 struct gfSeqSource *target = gfFindNamedSource(gf, tSeq->name);
01229 
01230 /* First do gapless expansions and restitch. */
01231 for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next)
01232     {
01233     ffi->ff = expandGapless(qSeq, tSeq, ffi->ff);
01234     }
01235 ssStitch(bun, ffCdna, 16, 16);
01236 
01237 for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next)
01238     {
01239     ffi->ff = scanIndexForSmallExons(gf, target, qSeq, qMaskBits, qMaskOffset, 
01240         tSeq, lm, ffi->ff);
01241     ffi->ff = bandedExtend(qSeq, tSeq, ffi->ff);
01242     ffi->ff = scanForSmallerExons(gf->tileSize, qSeq, tSeq, isRc, ffi->ff);
01243     ffi->ff = refineSpliceSites(qSeq, tSeq, ffi->ff);
01244     ffi->ff = scanForTinyInternal(qSeq, tSeq, isRc, ffi->ff);
01245     ffi->ff = smoothSmallGaps(qSeq, tSeq, ffi->ff);
01246     ffi->ff = trimFlakyEnds(qSeq, tSeq, ffi->ff);
01247     }
01248 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* refineSpliceSites ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList 
) [static, read]

Definition at line 1021 of file ffSeedExtend.c.

References ffIntronOrientation(), ffRemoveEmptyAlis(), ffSlideOrientedIntrons(), hardRefineSplice(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, tradeMismatchToCloseSpliceGap(), and TRUE.

Referenced by refineBundle().

01025 {
01026 int orientation = ffIntronOrientation(ffList);
01027 struct ffAli *ff, *nextFf;
01028 if (orientation == 0)
01029     return ffList;
01030 if (ffSlideOrientedIntrons(ffList, orientation))
01031     ffList = ffRemoveEmptyAlis(ffList, TRUE);
01032 for (ff = ffList; ff != NULL; ff = nextFf)
01033     {
01034     int nGap, hGap;
01035     if ((nextFf = ff->right) == NULL)
01036         break;
01037     nGap = nextFf->nStart - ff->nEnd;
01038     hGap = nextFf->hStart - ff->hEnd;
01039     if (nGap > 0 && nGap <= 6 && hGap >= 30)
01040         {
01041         if (nGap == 1)
01042             {
01043             if (tradeMismatchToCloseSpliceGap(ff, nextFf, orientation))
01044                 continue;
01045             }
01046         ffList = hardRefineSplice(ff, nextFf, qSeq, tSeq, ffList, orientation);
01047         }
01048     }
01049 return ffList;
01050 }

Here is the call graph for this function:

Here is the caller graph for this function:

static char* scanExactLeft ( char *  n,
int  nSize,
int  hSize,
char *  hEnd,
int  resolveLimit 
) [static]

Definition at line 547 of file ffSeedExtend.c.

References ffAli::hStart, and seedResolvePower().

Referenced by fillInExact().

00549 {
00550 /* Optimize a little by comparing the first character inline. */
00551 char n1 = *n++;
00552 char *hStart;
00553 int maxSize = seedResolvePower(nSize, resolveLimit);
00554 
00555 if (hSize > maxSize) hSize = maxSize;
00556 nSize -= 1;
00557 hStart = hEnd - hSize;
00558 
00559 hEnd -= nSize;
00560 while (hEnd >= hStart)
00561     {
00562     if (n1 == *hEnd && memcmp(n, hEnd+1, nSize) == 0)
00563         return hEnd;
00564     hEnd -= 1;
00565     }
00566 return NULL;
00567 }

Here is the call graph for this function:

Here is the caller graph for this function:

static char* scanExactRight ( char *  n,
int  nSize,
int  hSize,
char *  hStart,
int  resolveLimit 
) [static]

Definition at line 569 of file ffSeedExtend.c.

References ffAli::hEnd, and seedResolvePower().

Referenced by fillInExact().

00571 {
00572 /* Optimize a little by comparing the first character inline. */
00573 char n1 = *n++;
00574 char *hEnd;
00575 int maxSize = seedResolvePower(nSize, resolveLimit);
00576 
00577 if (hSize > maxSize) hSize = maxSize;
00578 hEnd = hStart + hSize;
00579 nSize -= 1;
00580 
00581 hEnd -= nSize;
00582 while (hStart <= hEnd)
00583     {
00584     if (n1 == *hStart && memcmp(n, hStart+1, nSize) == 0)
00585         return hStart;
00586     hStart += 1;
00587     }
00588 return NULL;
00589 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* scanForSmallerExons ( int  seedSize,
struct dnaSeq qSeq,
struct dnaSeq tSeq,
boolean  isRc,
struct ffAli ffList 
) [static, read]

Definition at line 718 of file ffSeedExtend.c.

References dnaSeq::dna, FALSE, ffCat(), foldInExtras(), scanTinyOne(), dnaSeq::size, and TRUE.

Referenced by refineBundle().

00722 {
00723 struct ffAli *extraList = NULL, *ff = ffList, *lastFf = NULL, *newFf;
00724 
00725 if (ff == NULL)
00726     return NULL;
00727 
00728 /* Look for initial gap. */
00729 newFf = scanTinyOne(qSeq->dna, ff->nStart, tSeq->dna, ff->hStart, 
00730         isRc, TRUE, FALSE, seedSize); 
00731 ffCat(&extraList, &newFf);
00732 
00733 /* Look for middle gaps. */
00734 for (;;)
00735     {
00736     lastFf = ff;
00737     ff = ff->right;
00738     if (ff == NULL)
00739         break;
00740     newFf = scanTinyOne(lastFf->nEnd, ff->nStart, lastFf->hEnd, ff->hStart, 
00741         isRc, FALSE, FALSE, seedSize);
00742     ffCat(&extraList, &newFf);
00743     }
00744 
00745 /* Look for end gaps. */
00746 newFf = scanTinyOne(lastFf->nEnd, qSeq->dna + qSeq->size, 
00747         lastFf->hEnd, tSeq->dna + tSeq->size, isRc, FALSE, TRUE, seedSize);
00748 ffCat(&extraList, &newFf);
00749 
00750 ffList = foldInExtras(qSeq, tSeq, ffList, extraList);
00751 return ffList;
00752 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* scanForTinyInternal ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
boolean  isRc,
struct ffAli ffList 
) [static, read]

Definition at line 754 of file ffSeedExtend.c.

References FALSE, ffCat(), fillInExact(), foldInExtras(), and ffAli::right.

Referenced by refineBundle().

00757 {
00758 struct ffAli *extraList = NULL, *ff = ffList, *lastFf = NULL, *newFf;
00759 
00760 if (ff == NULL)
00761     return NULL;
00762 
00763 /* Look for middle gaps. */
00764 for (;;)
00765     {
00766     lastFf = ff;
00767     ff = ff->right;
00768     if (ff == NULL)
00769         break;
00770     newFf = fillInExact(lastFf->nEnd, ff->nStart, lastFf->hEnd, ff->hStart, isRc, 
00771         FALSE, FALSE, 0);
00772     ffCat(&extraList, &newFf);
00773     }
00774 
00775 ffList = foldInExtras(qSeq, tSeq, ffList, extraList);
00776 return ffList;
00777 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* scanIndexForSmallExons ( struct genoFind gf,
struct gfSeqSource target,
struct dnaSeq qSeq,
Bits qMaskBits,
int  qMaskOffset,
struct dnaSeq tSeq,
struct lm lm,
struct ffAli ffList 
) [static, read]

Definition at line 348 of file ffSeedExtend.c.

References addExtraHits(), dnaSeq::dna, ffIntronMax, ffMakeRightLinks(), foldInExtras(), gfFindHitsInRegion(), ffAli::hEnd, lm, ffAli::nEnd, ffAli::nStart, ffAli::right, dnaSeq::size, genoFind::tileSize, tileSize, and ZeroVar.

Referenced by refineBundle().

00352 {
00353 int qGap, tGap, tStart, qStart;
00354 struct ffAli *lastFf = NULL, *ff = ffList;
00355 struct gfHit *hitList = NULL;
00356 struct dnaSeq qSubSeq;
00357 struct ffAli *extraList = NULL;
00358 int tileSize = gf->tileSize;
00359 int biggestToFind = 200;        /* Longer should be found at an earlier stage */
00360 
00361 /* Handle problematic empty case immediately. */
00362 if (ffList == NULL)
00363     return NULL;
00364 
00365 ZeroVar(&qSubSeq);
00366 
00367 /* Look for initial gap. */
00368 qGap = ff->nStart - qSeq->dna;
00369 tGap = ff->hStart - tSeq->dna;
00370 if (qGap >= tileSize && qGap <= biggestToFind && tGap >= tileSize)
00371     {
00372     tStart = ff->hStart - tSeq->dna;
00373     if (tGap > ffIntronMax) 
00374         {
00375         tGap = ffIntronMax;
00376         }
00377     qSubSeq.dna = qSeq->dna;
00378     qSubSeq.size = qGap;
00379     hitList = gfFindHitsInRegion(gf, &qSubSeq, qMaskBits, qMaskOffset,
00380         lm, target, tStart - tGap, tStart);
00381     addExtraHits(hitList, tileSize, &qSubSeq, tSeq, &extraList);
00382     }
00383 
00384 /* Look for middle gaps. */
00385 for (;;)
00386     {
00387     lastFf = ff;
00388     ff = ff->right;
00389     if (ff == NULL)
00390         break;
00391     qGap = ff->nStart - lastFf->nEnd;
00392     tGap = ff->hStart - lastFf->hEnd;
00393     if (qGap >= tileSize && qGap <= biggestToFind && tGap >= tileSize)
00394          {
00395          qStart = lastFf->nEnd - qSeq->dna;
00396          tStart = lastFf->hEnd - tSeq->dna;
00397          qSubSeq.dna = lastFf->nEnd;
00398          qSubSeq.size = qGap;
00399          hitList = gfFindHitsInRegion(gf, &qSubSeq, qMaskBits, qMaskOffset + qStart, 
00400                 lm, target, tStart, tStart + tGap);
00401          addExtraHits(hitList, tileSize, &qSubSeq, tSeq, &extraList);
00402          }
00403     }
00404 
00405 /* Look for end gaps. */
00406 qGap = qSeq->dna + qSeq->size - lastFf->nEnd;
00407 tGap = tSeq->dna + tSeq->size - lastFf->hEnd;
00408 if (qGap >= tileSize && qGap < biggestToFind && tGap >= tileSize)
00409     {
00410     if (tGap > ffIntronMax) tGap = ffIntronMax;
00411     qStart = lastFf->nEnd - qSeq->dna;
00412     tStart = lastFf->hEnd - tSeq->dna;
00413     qSubSeq.dna = lastFf->nEnd;
00414     qSubSeq.size = qGap;
00415     hitList = gfFindHitsInRegion(gf, &qSubSeq, qMaskBits, qMaskOffset + qStart, 
00416                 lm, target, tStart, tStart + tGap);
00417     addExtraHits(hitList, tileSize, &qSubSeq, tSeq, &extraList);
00418     }
00419 extraList = ffMakeRightLinks(extraList);
00420 ffList = foldInExtras(qSeq, tSeq, ffList, extraList);
00421 return ffList;
00422 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct ffAli* scanTinyOne ( char *  nStart,
char *  nEnd,
char *  hStart,
char *  hEnd,
boolean  isRc,
boolean  scanLeft,
boolean  scanRight,
int  seedSize 
) [read]

Definition at line 697 of file ffSeedExtend.c.

References countA(), countT(), fillInExact(), and findFromSmallerSeeds().

Referenced by scanForSmallerExons().

00700 {
00701 struct ffAli *ff;
00702 int nGap = nEnd - nStart;
00703 if (nGap > 80)          /* The index should have found things this big already. */
00704     return NULL;
00705 if (scanLeft && isRc)
00706     nStart += countT(nStart, nGap);
00707 if (scanRight && !isRc)
00708     nEnd -= countA(nStart, nGap);
00709 ff = fillInExact(nStart, nEnd, hStart, hEnd, isRc, scanLeft, scanRight, 3);
00710 if (ff != NULL)
00711     {
00712     return ff;
00713     }
00714 return findFromSmallerSeeds(nStart, nEnd, hStart, hEnd, isRc,
00715         scanLeft, scanRight, seedSize, 3);
00716 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int seedResolvePower ( int  seedSize,
int  resolveLimit 
) [static]

Definition at line 534 of file ffSeedExtend.c.

References ffIntronMax.

Referenced by findFromSmallerSeeds(), scanExactLeft(), and scanExactRight().

00537 {
00538 int res;
00539 if (seedSize >= 14)     /* Avoid int overflow */
00540     return ffIntronMax;
00541 res  = (1 << (seedSize+seedSize-resolveLimit));
00542 if (res > ffIntronMax)
00543     res = ffIntronMax;
00544 return res;
00545 }

Here is the caller graph for this function:

static boolean smoothOneGap ( struct ffAli left,
struct ffAli right,
struct ffAli ffList 
) [static]

Definition at line 1053 of file ffSeedExtend.c.

References axtScoreSchemeRnaDefault(), axtScoreSym(), bandExt(), FALSE, ffAliFromSym(), ffCalcCdnaGapPenalty(), ffRightmost(), ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, ss, and TRUE.

Referenced by smoothSmallGaps().

01058 {
01059 int nGap = right->nStart - left->nEnd;
01060 int hGap = right->hStart - left->hEnd;
01061 if (nGap > 0 && hGap > 0 && nGap < 10 && hGap < 10)
01062     {
01063     int sizeDiff = nGap - hGap;
01064     if (sizeDiff < 0) sizeDiff = -sizeDiff;
01065     if (sizeDiff <= 3)
01066         {
01067         struct axtScoreScheme *ss = axtScoreSchemeRnaDefault();
01068         char hSym[20], nSym[20];
01069         int symCount;
01070         if (bandExt(TRUE, ss, 3, left->nEnd, nGap, left->hEnd, hGap, 1,
01071                 sizeof(hSym), &symCount, nSym, hSym, NULL, NULL))
01072             {
01073             int gapPenalty = -ffCalcCdnaGapPenalty(hGap, nGap) * ss->matrix['a']['a'];
01074             int score = axtScoreSym(ss, symCount, nSym, hSym);
01075             if (score >= gapPenalty)
01076                 {
01077                 struct ffAli *l, *r;
01078                 l = ffAliFromSym(symCount, nSym, hSym, NULL, left->nEnd, left->hEnd);
01079                 r = ffRightmost(l);
01080                 left->right = l;
01081                 l->left = left;
01082                 r->right = right;
01083                 right->left = r;
01084                 return TRUE;
01085                 }
01086             }
01087         }
01088     }
01089 return FALSE;
01090 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* smoothSmallGaps ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList 
) [static, read]

Definition at line 1093 of file ffSeedExtend.c.

References FALSE, ffMergeNeedleAlis(), ffAli::left, ffAli::right, smoothOneGap(), and TRUE.

Referenced by refineBundle().

01096 {
01097 struct ffAli *left = ffList, *right;
01098 boolean smoothed = FALSE;
01099 
01100 if (ffList == NULL) return NULL;
01101 for (;;)
01102     {
01103     if ((right = left->right) == NULL)
01104         break;
01105     if (smoothOneGap(left, right, ffList))
01106         smoothed = TRUE;
01107     left = right;
01108     }
01109 if (smoothed)
01110     {
01111     ffList = ffMergeNeedleAlis(ffList, TRUE);
01112     }
01113 return ffList;
01114 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean totalDegenerateN ( char *  s,
int  seedSize 
) [static]

Definition at line 136 of file ffSeedExtend.c.

References FALSE, and TRUE.

Referenced by ffFindExtendNmers().

00138 {
00139 char c1 = s[0], c2 = s[1];
00140 int i;
00141 if (seedSize & 1)
00142     {
00143     seedSize -= 1;
00144     if (c1 != s[seedSize])
00145         return FALSE;
00146     }
00147 for (i=2; i<seedSize; i += 2)
00148     {
00149     if (c1 != s[i] || c2 != s[i+1])
00150         return FALSE;
00151     }
00152 return TRUE;
00153 }

Here is the caller graph for this function:

static boolean tradeMismatchToCloseSpliceGap ( struct ffAli left,
struct ffAli right,
int  orientation 
) [static]

Definition at line 779 of file ffSeedExtend.c.

References FALSE, ffAli::hEnd, ffAli::hStart, intronOrientation(), ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, and TRUE.

Referenced by refineSpliceSites().

00783 {
00784 if (intronOrientation(left->hEnd+1, right->hStart) == orientation)
00785     {
00786     left->hEnd += 1;
00787     left->nEnd += 1;
00788     return TRUE;
00789     }
00790 if (intronOrientation(left->hEnd, right->hStart-1) == orientation)
00791     {
00792     right->hStart -= 1;
00793     right->nStart -= 1;
00794     return TRUE;
00795     }
00796 return FALSE;
00797 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ffAli* trimFlakyEnds ( struct dnaSeq qSeq,
struct dnaSeq tSeq,
struct ffAli ffList 
) [static, read]

Definition at line 1156 of file ffSeedExtend.c.

References aPenalty(), ffAliCount(), ffIntronOrientation(), ffRightmost(), ffScoreMatch(), freeMem(), ffAli::hEnd, ffAli::hStart, ffAli::left, ffAli::nEnd, ffAli::nStart, ffAli::right, and trimGapPenalty().

Referenced by refineBundle().

01161 {
01162 int orientation = ffIntronOrientation(ffList);
01163 struct ffAli *left, *right;
01164 char *iStart, *iEnd;
01165 int blockScore, gapPenalty;
01166 
01167 /* If one or less block then don't bother. */
01168 if (ffAliCount(ffList) < 2)
01169     return ffList;
01170 
01171 /* Trim beginnings. */
01172 left = ffList;
01173 right = ffList->right;
01174 while (right != NULL)
01175     {
01176     blockScore = ffScoreMatch(left->nStart, left->hStart, 
01177         left->nEnd-left->nStart);
01178     blockScore -= aPenalty(left->nStart, left->nEnd - left->nStart);
01179     iStart = left->hEnd;
01180     iEnd = right->hStart;
01181     gapPenalty = trimGapPenalty(iEnd-iStart, 
01182         right->nStart - left->nEnd, iStart, iEnd, orientation);
01183     if (gapPenalty >= blockScore)
01184         {
01185         freeMem(left);
01186         ffList = right;
01187         right->left = NULL;
01188         }
01189     else
01190         break;
01191     left = right;
01192     right = right->right;
01193     }
01194 
01195 right = ffRightmost(ffList);
01196 if (right == ffList)
01197     return ffList;
01198 left = right->left;
01199 while (left != NULL)
01200     {
01201     blockScore = ffScoreMatch(right->nStart, right->hStart, 
01202         right->nEnd-right->nStart);
01203     blockScore -= aPenalty(right->nStart, right->nEnd - right->nStart);
01204     iStart = left->hEnd;
01205     iEnd = right->hStart;
01206     gapPenalty = trimGapPenalty(iEnd-iStart, 
01207         right->nStart - left->nEnd, iStart, iEnd, orientation);
01208     if (gapPenalty >= blockScore)
01209         {
01210         freeMem(right);
01211         left->right = NULL;
01212         }
01213     else
01214         break;
01215     right = left;
01216     left = left->left;
01217     }
01218 return ffList;
01219 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int trimGapPenalty ( int  hGap,
int  nGap,
char *  iStart,
char *  iEnd,
int  orientation 
) [static]

Definition at line 1137 of file ffSeedExtend.c.

References ffCalcGapPenalty(), ffCdna, and ffScoreIntron().

Referenced by trimFlakyEnds().

01139 {
01140 int penalty =  ffCalcGapPenalty(hGap, nGap, ffCdna);
01141 if (hGap > 2 || nGap > 2)       /* Not just a local extension. */
01142                                 /* Score gap to favor introns. */
01143     {
01144     penalty <<= 1;
01145     if (nGap > 0)       /* Intron gaps are not in n side at all. */
01146          penalty += 3;
01147                         /* Good splice sites give you bonus 2,
01148                          * bad give you penalty of six. */
01149     penalty += 6 - 2*ffScoreIntron(iStart[0], iStart[1], 
01150         iEnd[-2], iEnd[-1], orientation);
01151     }
01152 return penalty;
01153 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char const rcsid[] = "$Id: ffSeedExtend.c,v 1.36 2006/06/22 16:24:43 kent Exp $" [static]

Definition at line 15 of file ffSeedExtend.c.


Generated on Tue Dec 25 19:22:54 2007 for blat by  doxygen 1.5.2