jkOwnLib/gfBlatLib.c File Reference

#include "common.h"
#include "net.h"
#include "linefile.h"
#include "sqlNum.h"
#include "dnaseq.h"
#include "fa.h"
#include "fuzzyFind.h"
#include "supStitch.h"
#include "genoFind.h"
#include "gfInternal.h"
#include "errabort.h"
#include "nib.h"
#include "twoBit.h"
#include "trans3.h"

Include dependency graph for gfBlatLib.c:

Go to the source code of this file.

Functions

void gfRangeFree (struct gfRange **pEl)
void gfRangeFreeList (struct gfRange **pList)
static struct gfRangegfRangeLoad (char **row)
int gfRangeCmpTarget (const void *va, const void *vb)
static void startSeqQuery (int conn, bioSeq *seq, char *type)
static void gfServerWarn (bioSeq *seq, char *warning)
static struct gfRangegfQuerySeq (int conn, struct dnaSeq *seq)
static int findTileSize (char *line)
gfHitgetHitsFromServer (int conn, struct lm *lm)
static void gfQuerySeqTrans (int conn, aaSeq *seq, struct gfClump *clumps[2][3], struct lm *lm, struct gfSeqSource **retSsList, int *retTileSize)
static void gfQuerySeqTransTrans (int conn, struct dnaSeq *seq, struct gfClump *clumps[2][3][3], struct lm *lm, struct gfSeqSource **retSsList, int *retTileSize)
gfRangegfRangesBundle (struct gfRange *exonList, int maxIntron)
static boolean alignComponents (struct gfRange *combined, struct ssBundle *bun, enum ffStringency stringency)
static int scoreAli (struct ffAli *ali, boolean isProt, enum ffStringency stringency, struct dnaSeq *tSeq, struct trans3 *t3List)
static void saveAlignments (char *chromName, int chromSize, int chromOffset, struct ssBundle *bun, struct hash *t3Hash, boolean qIsRc, boolean tIsRc, enum ffStringency stringency, int minMatch, struct gfOutput *out)
hashgfFileCacheNew ()
static void gfFileCacheFreeEl (struct hashEl *el)
void gfFileCacheFree (struct hash **pCache)
static void getTargetName (char *tSpec, boolean includeFile, char *targetName)
void gfAlignStrand (int *pConn, char *tSeqDir, struct dnaSeq *seq, boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out)
char * clumpTargetName (struct gfClump *clump)
static struct gfRangeseqClumpToRangeList (struct gfClump *clumpList, int frame)
static struct ssBundlegfClumpsToBundles (struct gfClump *clumpList, boolean isRc, struct dnaSeq *seq, int minScore, struct gfRange **retRangeList)
static void extendHitRight (int qMax, int tMax, char **pEndQ, char **pEndT, int(*scoreMatch)(char a, char b), int maxDown)
static void extendHitLeft (int qMax, int tMax, char **pStartQ, char **pStartT, int(*scoreMatch)(char a, char b), int maxDown)
static void clumpToHspRange (struct gfClump *clump, bioSeq *qSeq, int tileSize, int frame, struct trans3 *t3, struct gfRange **pRangeList, boolean isProt, boolean fastMap)
ssFfItemgfRangesToFfItem (struct gfRange *rangeList, aaSeq *qSeq)
static struct ssBundlefastMapClumpsToBundles (struct genoFind *gf, struct gfClump *clumpList, bioSeq *qSeq)
static void gfAlignSomeClumps (struct genoFind *gf, struct gfClump *clumpList, bioSeq *seq, boolean isRc, int minMatch, struct gfOutput *out, boolean isProt, enum ffStringency stringency)
void gfAlignAaClumps (struct genoFind *gf, struct gfClump *clumpList, aaSeq *seq, boolean isRc, int minMatch, struct gfOutput *out)
int rangeScore (struct gfRange *range, struct dnaSeq *qSeq)
void gfFindAlignAaTrans (struct genoFind *gfs[3], aaSeq *qSeq, struct hash *t3Hash, boolean tIsRc, int minMatch, struct gfOutput *out)
void rangeCoorTimes3 (struct gfRange *rangeList)
static void loadHashT3Ranges (struct gfRange *rangeList, char *tSeqDir, struct hash *tFileCache, int qSeqSize, boolean isRc, struct hash **retT3Hash, struct dnaSeq **retSeqList, struct slRef **retT3RefList)
void gfAlignTrans (int *pConn, char *tSeqDir, aaSeq *seq, int minMatch, struct hash *tFileCache, struct gfOutput *out)
void untranslateRangeList (struct gfRange *rangeList, int qFrame, int tFrame, struct hash *t3Hash, struct trans3 *t3, int tOffset)
void gfAlignTransTrans (int *pConn, char *tSeqDir, struct dnaSeq *qSeq, boolean qIsRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, boolean isRna)
static struct ssBundlegfTransTransFindBundles (struct genoFind *gfs[3], struct dnaSeq *qSeq, struct hash *t3Hash, boolean isRc, int minMatch, boolean isRna)
static void addToBigBundleList (struct ssBundle **pOneList, struct hash *bunHash, struct ssBundle **pBigList, struct dnaSeq *query)
static boolean jiggleSmallExons (struct ffAli *ali, struct dnaSeq *nSeq, struct dnaSeq *hSeq)
static struct ffAlirefineSmallExons (struct ffAli *ff, struct dnaSeq *nSeq, struct dnaSeq *hSeq)
static void refineSmallExonsInBundle (struct ssBundle *bun)
void gfLongDnaInMem (struct dnaSeq *query, struct genoFind *gf, boolean isRc, int minScore, Bits *qMaskBits, struct gfOutput *out, boolean fastMap, boolean band)
void gfLongTransTransInMem (struct dnaSeq *query, struct genoFind *gfs[3], struct hash *t3Hash, boolean qIsRc, boolean tIsRc, boolean qIsRna, int minScore, struct gfOutput *out)

Variables

static char const rcsid [] = "$Id: gfBlatLib.c,v 1.25 2007/04/20 22:44:11 kent Exp $"
static int ssAliCount = 16
static int usualExpansion = 500


Function Documentation

static void addToBigBundleList ( struct ssBundle **  pOneList,
struct hash bunHash,
struct ssBundle **  pBigList,
struct dnaSeq query 
) [static]

Definition at line 1323 of file gfBlatLib.c.

References AllocVar, ssBundle::avoidFuzzyFindKludge, ssBundle::ffList, ssBundle::genoSeq, hashAdd(), hashFindVal(), ssBundle::isProt, dnaSeq::name, name, ssBundle::next, slAddHead, slCat(), and ssBundleFreeList().

01327 {
01328 struct ssBundle *oneBun, *bigBun;
01329 
01330 for (oneBun = *pOneList; oneBun != NULL; oneBun = oneBun->next)
01331     {
01332     char *name = oneBun->genoSeq->name;
01333     if ((bigBun = hashFindVal(bunHash, name)) == NULL)
01334         {
01335         AllocVar(bigBun);
01336         slAddHead(pBigList, bigBun);
01337         hashAdd(bunHash, name, bigBun);
01338         bigBun->qSeq = query;
01339         bigBun->genoSeq = oneBun->genoSeq;
01340         bigBun->isProt = oneBun->isProt;
01341         bigBun->avoidFuzzyFindKludge = oneBun->avoidFuzzyFindKludge;
01342         }
01343     bigBun->ffList = slCat(bigBun->ffList, oneBun->ffList);
01344     oneBun->ffList = NULL;
01345     }
01346 ssBundleFreeList(pOneList);
01347 }

Here is the call graph for this function:

static boolean alignComponents ( struct gfRange combined,
struct ssBundle bun,
enum ffStringency  stringency 
) [static]

Definition at line 399 of file gfBlatLib.c.

References AllocVar, gfRange::components, dnaSeq::dna, FALSE, ffFind(), ssBundle::ffList, ssBundle::genoSeq, gfRange::next, gfRange::qEnd, ssBundle::qSeq, gfRange::qStart, slAddHead, gfRange::tEnd, TRUE, and gfRange::tStart.

Referenced by gfAlignStrand(), and gfClumpsToBundles().

00403 {
00404 struct gfRange *range;
00405 struct dnaSeq *qSeq = bun->qSeq, *tSeq = bun->genoSeq;
00406 struct ssFfItem *ffi;
00407 struct ffAli *ali;
00408 int qStart, qEnd, tStart, tEnd;
00409 int extra = 250;
00410 boolean gotAny = FALSE;
00411 
00412 for (range = combined->components; range != NULL; range = range->next)
00413     {
00414     /* Expand to include some extra sequence around range. */
00415     qStart = range->qStart - extra;
00416     tStart = range->tStart - extra;
00417     qEnd = range->qEnd + extra;
00418     tEnd = range->tEnd + extra;
00419     if (range == combined->components)
00420         {
00421         qStart -= extra;
00422         tStart -= extra;
00423         }
00424     if (range->next == NULL)
00425         {
00426         qEnd += extra;
00427         tEnd += extra;
00428         }
00429     if (qStart < combined->qStart) qStart = combined->qStart;
00430     if (tStart < combined->tStart) tStart = combined->tStart;
00431     if (qEnd > combined->qEnd) qEnd = combined->qEnd;
00432     if (tEnd > combined->tEnd) tEnd = combined->tEnd;
00433     ali = ffFind(qSeq->dna + qStart,
00434                  qSeq->dna + qEnd,
00435                  tSeq->dna + tStart - combined->tStart,
00436                  tSeq->dna + tEnd - combined->tStart,
00437                  stringency);
00438     if (ali != NULL)
00439         {
00440         AllocVar(ffi);
00441         ffi->ff = ali;
00442         slAddHead(&bun->ffList, ffi);
00443         gotAny = TRUE;
00444         }
00445     }
00446 return gotAny;
00447 }

Here is the call graph for this function:

Here is the caller graph for this function:

char* clumpTargetName ( struct gfClump clump  ) 

Definition at line 587 of file gfBlatLib.c.

References gfSeqSource::fileName, internalErr, dnaSeq::name, name, gfSeqSource::seq, and gfClump::target.

Referenced by gfAlignTrans(), gfAlignTransTrans(), and seqClumpToRangeList().

00589 {
00590 struct gfSeqSource *target = clump->target;
00591 char *name;
00592 if (target->seq != NULL)
00593     name = target->seq->name;
00594 else
00595     name = target->fileName;
00596 if (name == NULL)
00597     internalErr();
00598 return name;
00599 }

Here is the caller graph for this function:

static void clumpToHspRange ( struct gfClump clump,
bioSeq qSeq,
int  tileSize,
int  frame,
struct trans3 t3,
struct gfRange **  pRangeList,
boolean  isProt,
boolean  fastMap 
) [static]

Definition at line 718 of file gfBlatLib.c.

References aaScore2(), AllocVar, cloneString(), dnaSeq::dna, dnaScore2(), extendHitLeft(), extendHitRight(), FALSE, gfClump::hitList, internalErr, dnaSeq::name, gfHit::next, gfRange::qEnd, gfHit::qStart, gfSeqSource::seq, dnaSeq::size, slAddHead, gfSeqSource::start, gfRange::t3, gfClump::target, gfRange::tEnd, TRUE, gfRange::tSeq, gfRange::tStart, and gfHit::tStart.

Referenced by fastMapClumpsToBundles(), gfAlignSomeClumps(), gfAlignTrans(), gfAlignTransTrans(), gfFindAlignAaTrans(), and gfTransTransFindBundles().

00724 {
00725 struct gfSeqSource *target = clump->target;
00726 aaSeq *tSeq = target->seq;
00727 BIOPOL *qs, *ts, *qe, *te;
00728 struct gfHit *hit;
00729 int qStart = 0, tStart = 0, qEnd = 0, tEnd = 0, newQ = 0, newT = 0;
00730 boolean outOfIt = TRUE;         /* Logically outside of a clump. */
00731 struct gfRange *range;
00732 BIOPOL *lastQs = NULL, *lastQe = NULL, *lastTs = NULL, *lastTe = NULL;
00733 int (*scoreMatch)(char a, char b) = (isProt ? aaScore2 : dnaScore2);
00734 int maxDown, minSpan;
00735 
00736 if (fastMap)
00737     {
00738     maxDown = 1;
00739     minSpan = 50;
00740     }
00741 else
00742     {
00743     maxDown = 10;
00744     minSpan = 0;
00745     }
00746 
00747 
00748 if (tSeq == NULL)
00749     internalErr();
00750 
00751 /* The termination condition of this loop is a little complicated.
00752  * We want to output something either when the next hit can't be
00753  * merged into the previous, or at the end of the list.  To avoid
00754  * duplicating the output code we're forced to complicate the loop
00755  * termination logic.  Hence the check for hit == NULL to break
00756  * the loop is not until near the end of the loop. */
00757 for (hit = clump->hitList; ; hit = hit->next)
00758     {
00759     if (hit != NULL)
00760         {
00761         newQ = hit->qStart;
00762         newT = hit->tStart - target->start;
00763         }
00764 
00765     /* See if it's time to output merged (diagonally adjacent) hits. */
00766     if (!outOfIt)       /* Not first time through. */
00767         {
00768         /* As a micro-optimization handle strings of adjacent hits
00769          * specially.  Don't do the extensions until we've merged
00770          * all adjacent hits. */
00771         if (hit == NULL || newQ != qEnd || newT != tEnd)
00772             {
00773             qs = qSeq->dna + qStart;
00774             ts = tSeq->dna + tStart;
00775             qe = qSeq->dna + qEnd;
00776             te = tSeq->dna + tEnd;
00777             extendHitRight(qSeq->size - qEnd, tSeq->size - tEnd,
00778                 &qe, &te, scoreMatch, maxDown);
00779             extendHitLeft(qStart, tStart, &qs, &ts, scoreMatch, maxDown);
00780             if (qs != lastQs || ts != lastTs || qe != lastQe || qs !=  lastQs)
00781                 {
00782                 lastQs = qs;
00783                 lastTs = ts;
00784                 lastQe = qe;
00785                 lastTe = te;
00786                 if (qe - qs >= minSpan)
00787                     {
00788                     AllocVar(range);
00789                     range->qStart = qs - qSeq->dna;
00790                     range->qEnd = qe - qSeq->dna;
00791                     range->tName = cloneString(tSeq->name);
00792                     range->tSeq = tSeq;
00793                     range->tStart = ts - tSeq->dna;
00794                     range->tEnd = te - tSeq->dna;
00795                     range->hitCount = qe - qs;
00796                     range->frame = frame;
00797                     range->t3 = t3;
00798                     assert(range->tEnd <= tSeq->size);
00799                     slAddHead(pRangeList, range);
00800                     }
00801                 }
00802             outOfIt = TRUE;
00803             }
00804         }
00805     if (hit == NULL)
00806         break;
00807 
00808     if (outOfIt)
00809         {
00810         qStart = newQ;
00811         qEnd = qStart + tileSize;
00812         tStart = newT;
00813         tEnd = tStart + tileSize;
00814         outOfIt = FALSE;
00815         }
00816     else
00817         {
00818         qEnd = newQ + tileSize;
00819         tEnd = newT + tileSize;
00820         }
00821     }
00822 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void extendHitLeft ( int  qMax,
int  tMax,
char **  pStartQ,
char **  pStartT,
int(*)(char a, char b)  scoreMatch,
int  maxDown 
) [static]

Definition at line 688 of file gfBlatLib.c.

References min.

Referenced by clumpToHspRange().

00692 {
00693 int maxScore = 0;
00694 int score = 0;
00695 int maxPos = 0;
00696 int last = -min(qMax, tMax);
00697 int i;
00698 char *q = *pStartQ, *t = *pStartT;
00699 
00700 for (i=-1; i>=last; --i)
00701     {
00702     score += scoreMatch(q[i], t[i]);
00703     if (score > maxScore)
00704          {
00705          maxScore = score;
00706          maxPos = i;
00707          }
00708     else if (i < maxPos - maxDown)
00709          {
00710          break;
00711          }
00712     }
00713 *pStartQ = q+maxPos;
00714 *pStartT = t+maxPos;
00715 }

Here is the caller graph for this function:

static void extendHitRight ( int  qMax,
int  tMax,
char **  pEndQ,
char **  pEndT,
int(*)(char a, char b)  scoreMatch,
int  maxDown 
) [static]

Definition at line 659 of file gfBlatLib.c.

References min.

Referenced by clumpToHspRange().

00663 {
00664 int maxScore = 0;
00665 int score = 0;
00666 int maxPos = -1;
00667 int last = min(qMax, tMax);
00668 int i;
00669 char *q = *pEndQ, *t = *pEndT;
00670 
00671 for (i=0; i<last; ++i)
00672     {
00673     score += scoreMatch(q[i], t[i]);
00674     if (score > maxScore)
00675          {
00676          maxScore = score;
00677          maxPos = i;
00678          }
00679     else if (i > maxPos + maxDown)
00680          {
00681          break;
00682          }
00683     }
00684 *pEndQ = q+maxPos+1;
00685 *pEndT = t+maxPos+1;
00686 }

Here is the caller graph for this function:

static struct ssBundle* fastMapClumpsToBundles ( struct genoFind gf,
struct gfClump clumpList,
bioSeq qSeq 
) [static, read]

Definition at line 849 of file gfBlatLib.c.

References AllocVar, clumpToHspRange(), FALSE, gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfClump::next, ssBundle::qSeq, rangeList, slAddHead, slReverse(), slSort(), genoFind::tileSize, and TRUE.

00851 {
00852 struct gfClump *clump;
00853 struct gfRange *rangeList = NULL, *range;
00854 bioSeq *targetSeq;
00855 struct ssBundle *bunList = NULL, *bun;
00856 
00857 for (clump = clumpList; clump != NULL; clump = clump->next)
00858     clumpToHspRange(clump, qSeq, gf->tileSize, 0, NULL, &rangeList, FALSE, TRUE);
00859 slReverse(&rangeList);
00860 slSort(&rangeList, gfRangeCmpTarget);
00861 rangeList = gfRangesBundle(rangeList, 256);
00862 for (range = rangeList; range != NULL; range = range->next)
00863     {
00864     targetSeq = range->tSeq;
00865     AllocVar(bun);
00866     bun->qSeq = qSeq;
00867     bun->genoSeq = targetSeq;
00868     bun->ffList = gfRangesToFfItem(range->components, qSeq);
00869     bun->isProt = FALSE;
00870     slAddHead(&bunList, bun);
00871     }
00872 gfRangeFreeList(&rangeList);
00873 return bunList;
00874 }

Here is the call graph for this function:

static int findTileSize ( char *  line  )  [static]

Definition at line 162 of file gfBlatLib.c.

References internalErr, nextWord(), sameString, and tileSize.

Referenced by gfQuerySeqTrans(), and gfQuerySeqTransTrans().

00164 {
00165 char *var, *val;
00166 int tileSize = 4;
00167 for (;;)
00168     {
00169     var = nextWord(&line);
00170     if (var == NULL)
00171          break;
00172     val = nextWord(&line);
00173     if (val == NULL)
00174          {
00175          internalErr();
00176          break;
00177          }
00178     if (sameString("tileSize", var))
00179          {
00180          tileSize = atoi(val);
00181          if (tileSize <= 0)
00182              internalErr();
00183          }
00184     }
00185 return tileSize;
00186 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfHit* getHitsFromServer ( int  conn,
struct lm lm 
) [read]

Definition at line 188 of file gfBlatLib.c.

References freez(), internalErr, lm, lmAllocVar, netRecieveLongString(), nextWord(), slAddHead, slReverse(), and sqlUnsigned().

Referenced by gfQuerySeqTrans(), and gfQuerySeqTransTrans().

00190 {
00191 char *s, *line, *q, *t;
00192 struct gfHit *hitList = NULL, *hit;
00193 s = line = netRecieveLongString(conn);
00194 for (;;)
00195     {
00196     if ((q = nextWord(&line)) == NULL)
00197          break;
00198     if ((t = nextWord(&line)) == NULL)
00199          internalErr();
00200     lmAllocVar(lm, hit);
00201     hit->qStart = sqlUnsigned(q);
00202     hit->tStart = sqlUnsigned(t);
00203     slAddHead(&hitList, hit);
00204     }
00205 freez(&s);
00206 slReverse(&hitList);
00207 return hitList;
00208 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void getTargetName ( char *  tSpec,
boolean  includeFile,
char *  targetName 
) [static]

Definition at line 537 of file gfBlatLib.c.

References gfiGetSeqName(), PATH_LEN, and safef().

Referenced by gfAlignStrand(), gfAlignTrans(), and gfAlignTransTrans().

00538                                                  : in targetName. */
00539 {
00540 if (includeFile)
00541     {
00542     char seqName[128];
00543     char fileName[PATH_LEN];
00544     gfiGetSeqName(tSpec, seqName, fileName);
00545     safef(targetName, PATH_LEN, "%s:%s", fileName, seqName);
00546     }
00547 else
00548     gfiGetSeqName(tSpec, targetName, NULL);
00549 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignAaClumps ( struct genoFind gf,
struct gfClump clumpList,
aaSeq seq,
boolean  isRc,
int  minMatch,
struct gfOutput out 
)

Definition at line 913 of file gfBlatLib.c.

References ffTight, gfAlignSomeClumps(), and TRUE.

Referenced by searchOneProt().

00916 {
00917 gfAlignSomeClumps(gf, clumpList, seq, isRc, minMatch, out, TRUE, ffTight);
00918 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void gfAlignSomeClumps ( struct genoFind gf,
struct gfClump clumpList,
bioSeq seq,
boolean  isRc,
int  minMatch,
struct gfOutput out,
boolean  isProt,
enum ffStringency  stringency 
) [static]

Definition at line 876 of file gfBlatLib.c.

References AllocVar, clumpToHspRange(), FALSE, ffIntronMax, ssBundle::ffList, ssBundle::genoSeq, gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), ssBundle::isProt, dnaSeq::name, gfClump::next, ssBundle::qSeq, rangeList, saveAlignments(), dnaSeq::size, slReverse(), slSort(), ssAliCount, ssBundleFree(), ssStitch(), and genoFind::tileSize.

Referenced by gfAlignAaClumps().

00881 {
00882 struct gfClump *clump;
00883 struct gfRange *rangeList = NULL, *range;
00884 bioSeq *targetSeq;
00885 struct ssBundle *bun;
00886 int intronMax = ffIntronMax;
00887 
00888 if (isProt)
00889     intronMax /= 3;
00890 for (clump = clumpList; clump != NULL; clump = clump->next)
00891     {
00892     clumpToHspRange(clump, seq, gf->tileSize, 0, NULL, &rangeList, isProt, FALSE);
00893     }
00894 slReverse(&rangeList);
00895 slSort(&rangeList, gfRangeCmpTarget);
00896 rangeList = gfRangesBundle(rangeList, intronMax);
00897 for (range = rangeList; range != NULL; range = range->next)
00898     {
00899     targetSeq = range->tSeq;
00900     AllocVar(bun);
00901     bun->qSeq = seq;
00902     bun->genoSeq = targetSeq;
00903     bun->ffList = gfRangesToFfItem(range->components, seq);
00904     bun->isProt = isProt;
00905     ssStitch(bun, stringency, minMatch, ssAliCount);
00906     saveAlignments(targetSeq->name, targetSeq->size, 0, 
00907         bun, NULL, isRc, FALSE, stringency, minMatch, out);
00908     ssBundleFree(&bun);
00909     }
00910 gfRangeFreeList(&rangeList);
00911 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignStrand ( int *  pConn,
char *  tSeqDir,
struct dnaSeq seq,
boolean  isRc,
int  minMatch,
struct hash tFileCache,
struct gfOutput out 
)

Definition at line 552 of file gfBlatLib.c.

References alignComponents(), AllocVar, FALSE, ffCdna, ffIntronMax, freeDnaSeq(), getTargetName(), gfiExpandAndLoadCached(), gfQuerySeq(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfOutput::includeTargetFile, range::next, PATH_LEN, rangeList, saveAlignments(), dnaSeq::size, slSort(), ssAliCount, ssBundleFree(), and ssStitch().

Referenced by doBlat(), and gfClient().

00557 {
00558 struct ssBundle *bun;
00559 struct gfRange *rangeList = NULL, *range;
00560 struct dnaSeq *targetSeq;
00561 char targetName[PATH_LEN];
00562 
00563 rangeList = gfQuerySeq(*pConn, seq);
00564 close(*pConn);
00565 *pConn = -1;
00566 slSort(&rangeList, gfRangeCmpTarget);
00567 /* Dump range list to file for debugging. */
00568 rangeList = gfRangesBundle(rangeList, ffIntronMax);
00569 for (range = rangeList; range != NULL; range = range->next)
00570     {
00571     getTargetName(range->tName, out->includeTargetFile, targetName);
00572     targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, 
00573         seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion);
00574     AllocVar(bun);
00575     bun->qSeq = seq;
00576     bun->genoSeq = targetSeq;
00577     alignComponents(range, bun, ffCdna);
00578     ssStitch(bun, ffCdna, minMatch, ssAliCount);
00579     saveAlignments(targetName, range->tTotalSize, range->tStart, 
00580         bun, NULL, isRc, FALSE, ffCdna, minMatch, out);
00581     ssBundleFree(&bun);
00582     freeDnaSeq(&targetSeq);
00583     }
00584 gfRangeFreeList(&rangeList);
00585 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignTrans ( int *  pConn,
char *  tSeqDir,
aaSeq seq,
int  minMatch,
struct hash tFileCache,
struct gfOutput out 
)

Definition at line 1042 of file gfBlatLib.c.

References AllocVar, clumpTargetName(), clumpToHspRange(), trans3::end, FALSE, ffCdna, ffIntronMax, ssBundle::ffList, freeDnaSeqList(), freeHash(), freeMem(), ssBundle::genoSeq, getTargetName(), gfClumpFreeList(), gfQuerySeqTrans(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), hashMustFindVal(), gfOutput::includeTargetFile, ssBundle::isProt, lm, lmCleanup(), lmInit(), loadHashT3Ranges(), gfClump::next, slRef::next, gfSeqSource::next, trans3::nibSize, PATH_LEN, ssBundle::qSeq, rangeCoorTimes3(), rangeList, saveAlignments(), trans3::seq, seqClumpToRangeList(), dnaSeq::size, slCat(), slFreeList(), slReverse(), slSort(), ss, ssAliCount, ssBundleFree(), ssStitch(), trans3::start, ssBundle::t3List, gfClump::target, gfClump::tEnd, tileSize, trans3::trans, trans3Find(), trans3Free(), TRUE, and gfClump::tStart.

Referenced by doBlat(), and gfClient().

01047 {
01048 struct ssBundle *bun;
01049 struct gfClump *clumps[2][3], *clump;
01050 struct gfRange *rangeList = NULL, *range, *rl;
01051 struct dnaSeq *targetSeq, *tSeqList = NULL;
01052 char targetName[PATH_LEN];
01053 int tileSize;
01054 int frame, isRc = 0;
01055 struct hash *t3Hash = NULL;
01056 struct slRef *t3RefList = NULL, *ref;
01057 struct gfSeqSource *ssList = NULL, *ss;
01058 struct trans3 *t3;
01059 struct lm *lm = lmInit(0);
01060 
01061 /* Get clumps from server. */
01062 gfQuerySeqTrans(*pConn, seq, clumps, lm, &ssList, &tileSize);
01063 close(*pConn);
01064 *pConn = -1;
01065 
01066 for (isRc = 0; isRc <= 1;  ++isRc)
01067     {
01068     /* Figure out which parts of sequence we need to load. */
01069     for (frame = 0; frame < 3; ++frame)
01070         {
01071         rl = seqClumpToRangeList(clumps[isRc][frame], frame);
01072         rangeList = slCat(rangeList, rl);
01073         }
01074     /* Convert from amino acid to nucleotide coordinates. */
01075     rangeCoorTimes3(rangeList);
01076     slSort(&rangeList, gfRangeCmpTarget);
01077     rangeList = gfRangesBundle(rangeList, ffIntronMax);
01078     loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, 
01079         isRc, &t3Hash, &tSeqList, &t3RefList);
01080 
01081     /* The old range list was not very precise - it was just to get
01082      * the DNA loaded.  */
01083     gfRangeFreeList(&rangeList);
01084 
01085 
01086     /* Patch up clump list and associated sequence source to refer
01087      * to bits of genome loaded into memory.  Create new range list
01088      * by extending hits in clumps. */
01089     for (frame = 0; frame < 3; ++frame)
01090         {
01091         for (clump = clumps[isRc][frame]; clump != NULL; clump = clump->next)
01092             {
01093             struct gfSeqSource *ss = clump->target;
01094             t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3);
01095             ss->seq = t3->trans[frame];
01096             ss->start = t3->start/3;
01097             ss->end = t3->end/3;
01098             clumpToHspRange(clump, seq, tileSize, frame, t3, &rangeList, TRUE, FALSE);
01099             }
01100         }
01101     slReverse(&rangeList);
01102     slSort(&rangeList, gfRangeCmpTarget);
01103     rangeList = gfRangesBundle(rangeList, ffIntronMax/3);
01104 
01105     /* Do detailed alignment of each of the clustered ranges. */
01106     for (range = rangeList; range != NULL; range = range->next)
01107         {
01108         targetSeq = range->tSeq;
01109         AllocVar(bun);
01110         bun->qSeq = seq;
01111         bun->genoSeq = targetSeq;
01112         bun->ffList = gfRangesToFfItem(range->components, seq);
01113         bun->isProt = TRUE;
01114         t3 = hashMustFindVal(t3Hash, range->tName);
01115         bun->t3List = t3;
01116         ssStitch(bun, ffCdna, minMatch, ssAliCount);
01117         getTargetName(range->tName, out->includeTargetFile, targetName);
01118         saveAlignments(targetName, t3->nibSize, 0, 
01119             bun, t3Hash, FALSE, isRc, ffCdna, minMatch, out);
01120         ssBundleFree(&bun);
01121         }
01122 
01123     /* Cleanup for this strand of database. */
01124     gfRangeFreeList(&rangeList);
01125     freeHash(&t3Hash);
01126     for (ref = t3RefList; ref != NULL; ref = ref->next)
01127         {
01128         struct trans3 *t3 = ref->val;
01129         trans3Free(&t3);
01130         }
01131     slFreeList(&t3RefList);
01132     freeDnaSeqList(&tSeqList);
01133     }
01134 
01135 /* Final cleanup. */
01136 for (isRc=0; isRc<=1; ++isRc)
01137     for (frame=0; frame<3; ++frame)
01138         gfClumpFreeList(&clumps[isRc][frame]);
01139 for (ss = ssList; ss != NULL; ss = ss->next)
01140     freeMem(ss->fileName);
01141 slFreeList(&ssList);
01142 lmCleanup(&lm);
01143 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignTransTrans ( int *  pConn,
char *  tSeqDir,
struct dnaSeq qSeq,
boolean  qIsRc,
int  minMatch,
struct hash tFileCache,
struct gfOutput out,
boolean  isRna 
)

Definition at line 1163 of file gfBlatLib.c.

References AllocVar, clumpTargetName(), clumpToHspRange(), FALSE, ffCdna, ffIntronMax, ffLoose, freeDnaSeqList(), freeHash(), freeMem(), getTargetName(), gfQuerySeqTransTrans(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfOutput::includeTargetFile, lm, lmCleanup(), lmInit(), loadHashT3Ranges(), gfClump::next, slRef::next, gfSeqSource::next, PATH_LEN, rangeCoorTimes3(), rangeList, saveAlignments(), trans3::seq, seqClumpToRangeList(), dnaSeq::size, slCat(), slFreeList(), slReverse(), slSort(), ss, ssAliCount, ssBundleFree(), ssStitch(), gfClump::target, gfClump::tEnd, tileSize, trans3::trans, trans3Find(), trans3Free(), trans3New(), TRUE, gfClump::tStart, and untranslateRangeList().

Referenced by doBlat(), and gfClient().

01170 {
01171 struct gfClump *clumps[2][3][3], *clump;
01172 char targetName[PATH_LEN];
01173 int qFrame, tFrame, tIsRc;
01174 struct gfSeqSource *ssList = NULL, *ss;
01175 struct lm *lm = lmInit(0);
01176 int tileSize;
01177 struct gfRange *rangeList = NULL, *rl, *range;
01178 struct trans3 *qTrans = trans3New(qSeq), *t3;
01179 struct slRef *t3RefList = NULL, *t3Ref;
01180 struct hash *t3Hash = NULL;
01181 struct dnaSeq *tSeqList = NULL;
01182 enum ffStringency stringency = (isRna ? ffCdna : ffLoose);
01183 
01184 /* Query server for clumps. */
01185 gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize);
01186 close(*pConn);
01187 *pConn = -1;
01188 
01189 for (tIsRc=0; tIsRc <= 1; ++tIsRc)
01190     {
01191     /* Figure out which ranges need to be loaded and load them. */
01192     for (qFrame = 0; qFrame < 3; ++qFrame)
01193         {
01194         for (tFrame = 0; tFrame < 3; ++tFrame)
01195             {
01196             rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame);
01197             rangeList = slCat(rangeList, rl);
01198             }
01199         }
01200     rangeCoorTimes3(rangeList);
01201     slSort(&rangeList, gfRangeCmpTarget);
01202     rangeList = gfRangesBundle(rangeList, ffIntronMax);
01203     loadHashT3Ranges(rangeList, tSeqDir, tFileCache,
01204         qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList);
01205 
01206     /* The old range list was not very precise - it was just to get
01207      * the DNA loaded.  */
01208     gfRangeFreeList(&rangeList);
01209 
01210     /* Patch up clump list and associated sequence source to refer
01211      * to bits of genome loaded into memory.  Create new range list
01212      * by extending hits in clumps. */
01213     for (qFrame = 0; qFrame < 3; ++qFrame)
01214         {
01215         for (tFrame = 0; tFrame < 3; ++tFrame)
01216             {
01217             for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next)
01218                 {
01219                 struct gfSeqSource *ss = clump->target;
01220                 struct gfRange *rangeSet = NULL;
01221                 t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3);
01222                 ss->seq = t3->trans[tFrame];
01223                 ss->start = t3->start/3;
01224                 ss->end = t3->end/3;
01225                 clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE);
01226                 untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start);
01227                 rangeList = slCat(rangeSet, rangeList);
01228                 }
01229             }
01230         }
01231     slReverse(&rangeList);
01232     slSort(&rangeList, gfRangeCmpTarget);
01233     rangeList = gfRangesBundle(rangeList, ffIntronMax);
01234 
01235     for (range = rangeList; range != NULL; range = range->next)
01236         {
01237         struct dnaSeq *targetSeq = range->tSeq;
01238         struct ssBundle *bun;
01239 
01240         AllocVar(bun);
01241         bun->qSeq = qSeq;
01242         bun->genoSeq = targetSeq;
01243         bun->ffList = gfRangesToFfItem(range->components, qSeq);
01244         ssStitch(bun, stringency, minMatch, ssAliCount);
01245         getTargetName(range->tName, out->includeTargetFile, targetName);
01246         t3 = range->t3;
01247         saveAlignments(targetName, t3->nibSize, t3->start, 
01248             bun, NULL, qIsRc, tIsRc, stringency, minMatch, out);
01249         ssBundleFree(&bun);
01250         }
01251 
01252     /* Cleanup for this strand of database. */
01253     gfRangeFreeList(&rangeList);
01254     freeHash(&t3Hash);
01255     for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next)
01256         {
01257         struct trans3 *t3 = t3Ref->val;
01258         trans3Free(&t3);
01259         }
01260     slFreeList(&t3RefList);
01261     freeDnaSeqList(&tSeqList);
01262     }
01263 trans3Free(&qTrans);
01264 for (ss = ssList; ss != NULL; ss = ss->next)
01265     freeMem(ss->fileName);
01266 slFreeList(&ssList);
01267 lmCleanup(&lm);
01268 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ssBundle* gfClumpsToBundles ( struct gfClump clumpList,
boolean  isRc,
struct dnaSeq seq,
int  minScore,
struct gfRange **  retRangeList 
) [static, read]

Definition at line 627 of file gfBlatLib.c.

References alignComponents(), AllocVar, FALSE, ffCdna, ssBundle::genoSeq, gfiExpandRange(), gfRangeCmpTarget(), gfRangesBundle(), range::next, ssBundle::qSeq, rangeList, seqClumpToRangeList(), dnaSeq::size, slAddHead, slReverse(), slSort(), ssAliCount, and ssStitch().

00631 {
00632 struct ssBundle *bun, *bunList = NULL;
00633 struct gfRange *rangeList = NULL, *range;
00634 struct dnaSeq *targetSeq;
00635 
00636 rangeList = seqClumpToRangeList(clumpList, 0);
00637 slSort(&rangeList, gfRangeCmpTarget);
00638 rangeList = gfRangesBundle(rangeList, 2000);
00639 for (range = rangeList; range != NULL; range = range->next)
00640     {
00641     targetSeq = range->tSeq;
00642     gfiExpandRange(range, seq->size, targetSeq->size, FALSE, isRc, 
00643         usualExpansion);
00644     range->tStart = 0;
00645     range->tEnd = targetSeq->size;
00646     AllocVar(bun);
00647     bun->qSeq = seq;
00648     bun->genoSeq = targetSeq;
00649     alignComponents(range, bun, ffCdna);
00650     ssStitch(bun, ffCdna, minScore, ssAliCount);
00651     slAddHead(&bunList, bun);
00652     }
00653 slReverse(&bunList);
00654 *retRangeList = rangeList;
00655 return bunList;
00656 }

Here is the call graph for this function:

void gfFileCacheFree ( struct hash **  pCache  ) 

Definition at line 526 of file gfBlatLib.c.

References gfFileCacheFreeEl(), hashFree, and hashTraverseEls().

Referenced by gfClient(), and gfPcrViaNet().

00528 {
00529 struct hash *cache = *pCache;
00530 if (cache != NULL)
00531     {
00532     hashTraverseEls(cache, gfFileCacheFreeEl);
00533     hashFree(pCache);
00534     }
00535 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void gfFileCacheFreeEl ( struct hashEl el  )  [static]

Definition at line 509 of file gfBlatLib.c.

References hashEl::name, name, nibInfoFree(), nibIsFile(), twoBitClose(), and hashEl::val.

Referenced by gfFileCacheFree().

00511 {
00512 char *name = el->name;
00513 if (nibIsFile(name))
00514     {
00515     struct nibInfo *nib = el->val;
00516     nibInfoFree(&nib);
00517     }
00518 else
00519     {
00520     struct twoBitFile *tbf = el->val;
00521     twoBitClose(&tbf);
00522     }
00523 el->val = NULL;
00524 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct hash* gfFileCacheNew (  )  [read]

Definition at line 503 of file gfBlatLib.c.

References hashNew.

Referenced by doBlat(), gfClient(), and gfPcrViaNet().

00505 {
00506 return hashNew(0);
00507 }

Here is the caller graph for this function:

void gfFindAlignAaTrans ( struct genoFind gfs[3],
aaSeq qSeq,
struct hash t3Hash,
boolean  tIsRc,
int  minMatch,
struct gfOutput out 
)

Definition at line 938 of file gfBlatLib.c.

References AllocVar, clumpToHspRange(), FALSE, ffCdna, ffIntronMax, ssBundle::ffList, ssBundle::genoSeq, gfClumpFreeList(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfTransFindClumps(), hashMustFindVal(), ssBundle::isProt, lm, lmCleanup(), lmInit(), dnaSeq::name, gfClump::next, ssBundle::qSeq, rangeList, saveAlignments(), trans3::seq, dnaSeq::size, slReverse(), slSort(), ssAliCount, ssBundleFree(), ssStitch(), ssBundle::t3List, tileSize, genoFind::tileSize, and TRUE.

Referenced by tripleSearch().

00942 {
00943 struct gfClump *clumps[3];
00944 int frame;
00945 struct gfClump *clump;
00946 struct gfRange *rangeList = NULL, *range;
00947 aaSeq *targetSeq;
00948 struct ssBundle *bun;
00949 int tileSize = gfs[0]->tileSize;
00950 struct trans3 *t3;
00951 int hitCount;
00952 struct lm *lm = lmInit(0);
00953 
00954 gfTransFindClumps(gfs, qSeq, clumps, lm, &hitCount);
00955 for (frame=0; frame<3; ++frame)
00956     {
00957     for (clump = clumps[frame]; clump != NULL; clump = clump->next)
00958         {
00959         clumpToHspRange(clump, qSeq, tileSize, frame, NULL, &rangeList, TRUE, FALSE);
00960         }
00961     }
00962 slReverse(&rangeList);
00963 slSort(&rangeList, gfRangeCmpTarget);
00964 rangeList = gfRangesBundle(rangeList, ffIntronMax/3);
00965 for (range = rangeList; range != NULL; range = range->next)
00966     {
00967     targetSeq = range->tSeq;
00968     t3 = hashMustFindVal(t3Hash, targetSeq->name);
00969     AllocVar(bun);
00970     bun->qSeq = qSeq;
00971     bun->genoSeq = targetSeq;
00972     bun->ffList = gfRangesToFfItem(range->components, qSeq);
00973     bun->isProt = TRUE;
00974     bun->t3List = t3;
00975     ssStitch(bun, ffCdna, minMatch, ssAliCount);
00976     saveAlignments(targetSeq->name, t3->seq->size, 0, 
00977         bun, t3Hash, FALSE, tIsRc, ffCdna, minMatch, out);
00978     ssBundleFree(&bun);
00979     }
00980 gfRangeFreeList(&rangeList);
00981 for (frame=0; frame<3; ++frame)
00982     gfClumpFreeList(&clumps[frame]);
00983 lmCleanup(&lm);
00984 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfLongDnaInMem ( struct dnaSeq query,
struct genoFind gf,
boolean  isRc,
int  minScore,
Bits qMaskBits,
struct gfOutput out,
boolean  fastMap,
boolean  band 
)

Definition at line 1478 of file gfBlatLib.c.

References lmInit(), newHash(), rangeList, and dnaSeq::size.

Referenced by searchOneStrand().

01483 {
01484 int hitCount;
01485 int maxSize = 5000;
01486 int preferredSize = 4500;
01487 int overlapSize = 250;
01488 struct dnaSeq subQuery = *query;
01489 struct lm *lm = lmInit(0);
01490 int subOffset, subSize, nextOffset;
01491 DNA saveEnd, *endPos;
01492 struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
01493 struct hash *bunHash = newHash(8);
01494 
01495 for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
01496     {
01497     struct gfClump *clumpList;
01498     struct gfRange *rangeList = NULL;
01499 
01500     /* Figure out size of this piece.  If query is
01501      * maxSize or less do it all.   Otherwise just
01502      * do prefered size, and set it up to overlap
01503      * with surrounding pieces by overlapSize.  */
01504     if (subOffset == 0 && query->size <= maxSize)
01505         nextOffset = subSize = query->size;
01506     else
01507         {
01508         subSize = preferredSize;
01509         if (subSize + subOffset >= query->size)
01510             {
01511             subSize = query->size - subOffset;
01512             nextOffset = query->size;
01513             }
01514         else
01515             {
01516             nextOffset = subOffset + preferredSize - overlapSize;
01517             }
01518         }
01519     subQuery.dna = query->dna + subOffset;
01520     subQuery.size = subSize;
01521     endPos = &subQuery.dna[subSize];
01522     saveEnd = *endPos;
01523     *endPos = 0;
01524     if (band)
01525         {
01526         oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc);
01527         }
01528     else
01529         {
01530         clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount);
01531         if (fastMap)
01532             {
01533             oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery);
01534             }
01535         else
01536             {
01537             oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList);
01538             gfRangeFreeList(&rangeList);
01539             }
01540         gfClumpFreeList(&clumpList);
01541         }
01542     addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
01543     *endPos = saveEnd;
01544     }
01545 #ifdef DEBUG
01546 dumpBunList(bigBunList);
01547 #endif /* DEBUG */
01548 for (bun = bigBunList; bun != NULL; bun = bun->next)
01549     {
01550     ssStitch(bun, ffCdna, minScore, ssAliCount);
01551     if (!fastMap && !band)
01552         refineSmallExonsInBundle(bun);
01553     saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
01554         bun, NULL, isRc, FALSE, ffCdna, minScore, out);
01555     }
01556 ssBundleFreeList(&bigBunList);
01557 freeHash(&bunHash);
01558 lmCleanup(&lm);
01559 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfLongTransTransInMem ( struct dnaSeq query,
struct genoFind gfs[3],
struct hash t3Hash,
boolean  qIsRc,
boolean  tIsRc,
boolean  qIsRna,
int  minScore,
struct gfOutput out 
)

Definition at line 1562 of file gfBlatLib.c.

References ffCdna, ffLoose, newHash(), and dnaSeq::size.

Referenced by transTripleSearch().

01567 {
01568 enum ffStringency stringency = (qIsRna ? ffCdna : ffLoose);
01569 int maxSize = 1500;
01570 int preferredSize = 1200;       /* PreferredSize - overlapSize might need to be multiple of 3. */
01571 int overlapSize = 270;
01572 struct dnaSeq subQuery = *query;
01573 int subOffset, subSize, nextOffset;
01574 DNA saveEnd, *endPos;
01575 struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
01576 struct hash *bunHash = newHash(8);
01577 
01578 for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
01579     {
01580     /* Figure out size of this piece.  If query is
01581      * maxSize or less do it all.   Otherwise just
01582      * do prefered size, and set it up to overlap
01583      * with surrounding pieces by overlapSize.  */
01584     if (subOffset == 0 && query->size <= maxSize)
01585         nextOffset = subSize = query->size;
01586     else
01587         {
01588         subSize = preferredSize;
01589         if (subSize + subOffset >= query->size)
01590             {
01591             subSize = query->size - subOffset;
01592             nextOffset = query->size;
01593             }
01594         else
01595             {
01596             nextOffset = subOffset + preferredSize - overlapSize;
01597             }
01598         }
01599     subQuery.dna = query->dna + subOffset;
01600     subQuery.size = subSize;
01601     endPos = &subQuery.dna[subSize];
01602     saveEnd = *endPos;
01603     *endPos = 0;
01604     oneBunList = gfTransTransFindBundles(gfs, &subQuery, t3Hash, qIsRc, minScore, qIsRna);
01605     addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
01606     *endPos = saveEnd;
01607     }
01608 for (bun = bigBunList; bun != NULL; bun = bun->next)
01609     {
01610     ssStitch(bun, ffCdna, minScore, ssAliCount);
01611     saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
01612         bun, NULL, qIsRc, tIsRc, stringency, minScore, out);
01613     }
01614 hashFree(&bunHash);
01615 ssBundleFreeList(&bigBunList);
01616 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct gfRange* gfQuerySeq ( int  conn,
struct dnaSeq seq 
) [static, read]

Definition at line 127 of file gfBlatLib.c.

References chopLine, errAbort(), gfRangeLoad(), gfServerWarn(), netRecieveString(), rangeList, sameString, slAddHead, slReverse(), startSeqQuery(), and startsWith().

Referenced by gfAlignStrand().

00129 {
00130 struct gfRange *rangeList = NULL, *range;
00131 char buf[256], *row[6];
00132 int rowSize;
00133 
00134 startSeqQuery(conn, seq, "query");
00135 
00136 /* Read results line by line and save in list, and return. */
00137 for (;;)
00138     {
00139     netRecieveString(conn, buf);
00140     if (sameString(buf, "end"))
00141         {
00142         break;
00143         }
00144     else if (startsWith("Error:", buf))
00145         {
00146         gfServerWarn(seq, buf);
00147         break;
00148         }
00149     else
00150         {
00151         rowSize = chopLine(buf, row);
00152         if (rowSize < 6)
00153             errAbort("Expecting 6 words from server got %d", rowSize);
00154         range = gfRangeLoad(row);
00155         slAddHead(&rangeList, range);
00156         }
00157     }
00158 slReverse(&rangeList);
00159 return rangeList;
00160 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void gfQuerySeqTrans ( int  conn,
aaSeq seq,
struct gfClump clumps[2][3],
struct lm lm,
struct gfSeqSource **  retSsList,
int *  retTileSize 
) [static]

Definition at line 210 of file gfBlatLib.c.

References AllocVar, chopLine, cloneString(), errAbort(), findTileSize(), getHitsFromServer(), gfServerWarn(), gfClump::hitCount, gfClump::hitList, lm, netRecieveString(), gfClump::qEnd, gfClump::qStart, sameString, gfSeqSource::seq, slAddHead, slCount(), slReverse(), sqlUnsigned(), ss, startSeqQuery(), startsWith(), gfClump::target, gfClump::tEnd, tileSize, and gfClump::tStart.

Referenced by gfAlignTrans().

00213 {
00214 int frame, isRc, rowSize;
00215 struct gfClump *clump;
00216 int tileSize = 0;
00217 char *line;
00218 char buf[256], *row[12];
00219 struct gfSeqSource *ssList = NULL, *ss;
00220 
00221 for (isRc = 0; isRc <= 1; ++isRc)
00222     for (frame = 0; frame<3; ++frame)
00223         clumps[isRc][frame] = NULL;
00224 
00225 /* Send sequence to server. */
00226 startSeqQuery(conn, seq, "protQuery");
00227 line = netRecieveString(conn, buf);
00228 if (!startsWith("Error:", line))
00229     {
00230     tileSize = findTileSize(line);
00231 
00232     /* Read results line by line and save in memory. */
00233     for (;;)
00234         {
00235         /* Read and parse first line that describes clump overall. */
00236         netRecieveString(conn, buf);
00237         if (sameString(buf, "end"))
00238             {
00239             break;
00240             }
00241         else if (startsWith("Error:", buf))
00242             {
00243             gfServerWarn(seq, buf);
00244             break;
00245             }
00246         rowSize = chopLine(buf, row);
00247         if (rowSize < 8)
00248             errAbort("Expecting 8 words from server got %d", rowSize);
00249         AllocVar(clump);
00250         clump->qStart = sqlUnsigned(row[0]);
00251         clump->qEnd = sqlUnsigned(row[1]);
00252         AllocVar(ss);
00253         ss->fileName = cloneString(row[2]);
00254         slAddHead(&ssList, ss);
00255         clump->target = ss;
00256         clump->tStart = sqlUnsigned(row[3]);
00257         clump->tEnd = sqlUnsigned(row[4]);
00258         clump->hitCount = sqlUnsigned(row[5]);
00259         isRc = ((row[6][0] == '-') ? 1 : 0);
00260         frame = sqlUnsigned(row[7]);
00261         slAddHead(&clumps[isRc][frame], clump);
00262 
00263         /* Read and parse next (long) line that describes hits. */
00264         clump->hitList = getHitsFromServer(conn, lm);
00265         assert(slCount(clump->hitList) == clump->hitCount);
00266         }
00267     for (isRc = 0; isRc <= 1; ++isRc)
00268         for (frame = 0; frame<3; ++frame)
00269             slReverse(&clumps[isRc][frame]);
00270     }
00271 else
00272     {
00273     gfServerWarn(seq, line);
00274     }
00275 *retSsList = ssList;
00276 *retTileSize = tileSize;
00277 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void gfQuerySeqTransTrans ( int  conn,
struct dnaSeq seq,
struct gfClump clumps[2][3][3],
struct lm lm,
struct gfSeqSource **  retSsList,
int *  retTileSize 
) [static]

Definition at line 279 of file gfBlatLib.c.

References AllocVar, chopLine, cloneString(), errAbort(), findTileSize(), getHitsFromServer(), gfServerWarn(), gfClump::hitCount, gfClump::hitList, lm, netRecieveString(), gfClump::qEnd, gfClump::qStart, sameString, gfSeqSource::seq, slAddHead, slCount(), slReverse(), sqlUnsigned(), ss, startSeqQuery(), startsWith(), gfClump::target, gfClump::tEnd, tileSize, and gfClump::tStart.

Referenced by gfAlignTransTrans().

00284 {
00285 int qFrame, tFrame, isRc, rowSize;
00286 struct gfClump *clump;
00287 int tileSize = 0;
00288 char *line;
00289 char buf[256], *row[12];
00290 struct gfSeqSource *ssList = NULL, *ss;
00291 
00292 for (isRc = 0; isRc <= 1; ++isRc)
00293     for (qFrame = 0; qFrame<3; ++qFrame)
00294         for (tFrame = 0; tFrame<3; ++tFrame)
00295             clumps[isRc][qFrame][tFrame] = NULL;
00296 
00297 /* Send sequence to server. */
00298 startSeqQuery(conn, seq, "transQuery");
00299 line = netRecieveString(conn, buf);
00300 if (!startsWith("Error:", line))
00301     {
00302     tileSize = findTileSize(line);
00303 
00304     /* Read results line by line and save in memory. */
00305     for (;;)
00306         {
00307         /* Read and parse first line that describes clump overall. */
00308         netRecieveString(conn, buf);
00309         if (sameString(buf, "end"))
00310             {
00311             break;
00312             }
00313         else if (startsWith("Error:", buf))
00314             {
00315             gfServerWarn(seq, buf);
00316             break;
00317             }
00318         rowSize = chopLine(buf, row);
00319         if (rowSize < 9)
00320             errAbort("Expecting 9 words from server got %d", rowSize);
00321         AllocVar(clump);
00322         clump->qStart = sqlUnsigned(row[0]);
00323         clump->qEnd = sqlUnsigned(row[1]);
00324         AllocVar(ss);
00325         ss->fileName = cloneString(row[2]);
00326         slAddHead(&ssList, ss);
00327         clump->target = ss;
00328         clump->tStart = sqlUnsigned(row[3]);
00329         clump->tEnd = sqlUnsigned(row[4]);
00330         clump->hitCount = sqlUnsigned(row[5]);
00331         isRc = ((row[6][0] == '-') ? 1 : 0);
00332         qFrame = sqlUnsigned(row[7]);
00333         tFrame = sqlUnsigned(row[8]);
00334         slAddHead(&clumps[isRc][qFrame][tFrame], clump);
00335 
00336         /* Read and parse next (long) line that describes hits. */
00337         clump->hitList = getHitsFromServer(conn, lm);
00338         assert(slCount(clump->hitList) == clump->hitCount);
00339         }
00340     for (isRc = 0; isRc <= 1; ++isRc)
00341         for (qFrame = 0; qFrame<3; ++qFrame)
00342             for (tFrame = 0; tFrame<3; ++tFrame)
00343                 slReverse(&clumps[isRc][qFrame][tFrame]);
00344     }
00345 else
00346     {
00347     gfServerWarn(seq, buf);
00348     }
00349 *retSsList = ssList;
00350 *retTileSize = tileSize;
00351 }

Here is the call graph for this function:

Here is the caller graph for this function:

int gfRangeCmpTarget ( const void *  va,
const void *  vb 
)

Definition at line 83 of file gfBlatLib.c.

References gfRange::t3, gfRange::tName, and gfRange::tStart.

Referenced by fastMapClumpsToBundles(), ffSeedExtInMem(), gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), gfClumpsToBundles(), gfFindAlignAaTrans(), and gfTransTransFindBundles().

00085 {
00086 const struct gfRange *a = *((struct gfRange **)va);
00087 const struct gfRange *b = *((struct gfRange **)vb);
00088 int diff;
00089 
00090 diff = strcmp(a->tName, b->tName);
00091 if (diff == 0)
00092     {
00093     long lDiff = a->t3 - b->t3;
00094     if (lDiff < 0)
00095        diff = -1;
00096     else if (lDiff > 0)
00097        diff = 1;
00098     else
00099        diff = 0;
00100 #ifdef SOLARIS_WORKAROUND_COMPILER_BUG_BUT_FAILS_IN_64_BIT
00101     diff = (unsigned long)(a->t3) - (unsigned long)(b->t3);     /* Casts needed for Solaris.  Thanks Darren Platt! */
00102 #endif /* SOLARIS_WORKAROUND_COMPILER_BUG_BUT_FAILS_IN_64_BIT */
00103     }
00104 if (diff == 0)
00105     diff = a->tStart - b->tStart;
00106 return diff;
00107 }

Here is the caller graph for this function:

void gfRangeFree ( struct gfRange **  pEl  ) 

Definition at line 41 of file gfBlatLib.c.

References gfRange::components, freeMem(), freez(), gfRangeFreeList(), and gfRange::tName.

Referenced by gfRangeFreeList().

00044 {
00045 struct gfRange *el;
00046 
00047 if ((el = *pEl) == NULL) return;
00048 freeMem(el->tName);
00049 if (el->components != NULL)
00050     gfRangeFreeList(&el->components);
00051 freez(pEl);
00052 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfRangeFreeList ( struct gfRange **  pList  ) 

Definition at line 54 of file gfBlatLib.c.

References gfRangeFree(), and gfRange::next.

Referenced by fastMapClumpsToBundles(), ffSeedExtInMem(), gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), gfFindAlignAaTrans(), gfPcrOneViaNet(), gfRangeFree(), and gfTransTransFindBundles().

00056 {
00057 struct gfRange *el, *next;
00058 
00059 for (el = *pList; el != NULL; el = next)
00060     {
00061     next = el->next;
00062     gfRangeFree(&el);
00063     }
00064 *pList = NULL;
00065 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct gfRange* gfRangeLoad ( char **  row  )  [static, read]

Definition at line 67 of file gfBlatLib.c.

References AllocVar, cloneString(), gfRange::hitCount, gfRange::qEnd, gfRange::qStart, gfRange::tEnd, gfRange::tName, and gfRange::tStart.

Referenced by gfQuerySeq().

00070 {
00071 struct gfRange *ret;
00072 
00073 AllocVar(ret);
00074 ret->qStart = atoi(row[0]);
00075 ret->qEnd = atoi(row[1]);
00076 ret->tName = cloneString(row[2]);
00077 ret->tStart = atoi(row[3]);
00078 ret->tEnd = atoi(row[4]);
00079 ret->hitCount = atoi(row[5]);
00080 return ret;
00081 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfRange* gfRangesBundle ( struct gfRange exonList,
int  maxIntron 
) [read]

Definition at line 355 of file gfBlatLib.c.

References AllocVar, cloneString(), gfRange::next, sameString, slAddHead, and slAddTail().

Referenced by fastMapClumpsToBundles(), ffSeedExtInMem(), gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), gfClumpsToBundles(), gfFindAlignAaTrans(), and gfTransTransFindBundles().

00360 {
00361 struct gfRange *geneList = NULL, *gene = NULL, *lastExon = NULL, *exon, *nextExon;
00362 
00363 for (exon = exonList; exon != NULL; exon = nextExon)
00364     {
00365     nextExon = exon->next;
00366     if (lastExon == NULL || !sameString(lastExon->tName, exon->tName) 
00367         || exon->t3  != lastExon->t3
00368         || exon->tStart - lastExon->tEnd > maxIntron)
00369         {
00370         AllocVar(gene);
00371         gene->tStart = exon->tStart;
00372         gene->tEnd = exon->tEnd;
00373         gene->tName = cloneString(exon->tName);
00374         gene->tSeq = exon->tSeq;
00375         gene->qStart = exon->qStart;
00376         gene->qEnd = exon->qEnd;
00377         gene->hitCount = exon->hitCount;
00378         gene->t3 = exon->t3;
00379         slAddHead(&gene->components, exon);
00380         slAddHead(&geneList, gene);
00381         }
00382     else
00383         {
00384         if (exon->qStart < gene->qStart) gene->qStart = exon->qStart;
00385         if (exon->qEnd > gene->qEnd) gene->qEnd = exon->qEnd;
00386         if (exon->tStart < gene->tStart) gene->tStart = exon->tStart;
00387         if (exon->tEnd > gene->tEnd) gene->tEnd = exon->tEnd;
00388         gene->hitCount += exon->hitCount;
00389         slAddTail(&gene->components, exon);
00390         }
00391     lastExon = exon;
00392     }
00393 slReverse(&geneList);
00394 return geneList;
00395 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct ssFfItem* gfRangesToFfItem ( struct gfRange rangeList,
aaSeq qSeq 
) [read]

Definition at line 824 of file gfBlatLib.c.

References AllocVar, dnaSeq::dna, ssFfItem::ff, ffMakeRightLinks(), gfRange::next, gfRange::qEnd, gfRange::qStart, rangeList, gfRange::tEnd, gfRange::tSeq, and gfRange::tStart.

Referenced by fastMapClumpsToBundles(), ffSeedExtInMem(), gfAlignSomeClumps(), gfAlignTrans(), gfAlignTransTrans(), gfFindAlignAaTrans(), and gfTransTransFindBundles().

00826 {
00827 AA *q = qSeq->dna;
00828 struct ffAli *ffList = NULL, *ff;
00829 struct gfRange *range;
00830 struct ssFfItem *ffi;
00831 
00832 for (range = rangeList; range != NULL; range = range->next)
00833     {
00834     aaSeq *tSeq = range->tSeq;
00835     AA *t = tSeq->dna;
00836     AllocVar(ff);
00837     ff->nStart = q + range->qStart;
00838     ff->nEnd = q + range->qEnd;
00839     ff->hStart = t + range->tStart;
00840     ff->hEnd = t + range->tEnd;
00841     ff->left = ffList;
00842     ffList = ff;
00843     }
00844 AllocVar(ffi);
00845 ffi->ff = ffMakeRightLinks(ffList);
00846 return ffi;
00847 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void gfServerWarn ( bioSeq seq,
char *  warning 
) [static]

Definition at line 121 of file gfBlatLib.c.

References dnaSeq::name, and warn().

Referenced by gfQuerySeq(), gfQuerySeqTrans(), and gfQuerySeqTransTrans().

00123 {
00124 warn("couldn't process %s: %s", seq->name, warning);
00125 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct ssBundle* gfTransTransFindBundles ( struct genoFind gfs[3],
struct dnaSeq qSeq,
struct hash t3Hash,
boolean  isRc,
int  minMatch,
boolean  isRna 
) [static, read]

Definition at line 1271 of file gfBlatLib.c.

References AllocVar, clumpToHspRange(), FALSE, ffCdna, ssBundle::ffList, ffLoose, ssBundle::genoSeq, gfClumpFreeList(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfTransTransFindClumps(), lm, lmCleanup(), lmInit(), range::next, gfClump::next, ssBundle::qSeq, rangeList, slAddHead, slCat(), slReverse(), slSort(), ssAliCount, ssStitch(), genoFind::tileSize, tileSize, trans3::trans, trans3Free(), trans3New(), TRUE, and untranslateRangeList().

01275 {
01276 struct trans3 *qTrans = trans3New(qSeq);
01277 int qFrame, tFrame;
01278 struct gfClump *clumps[3][3], *clump;
01279 struct gfRange *rangeList = NULL, *range;
01280 int tileSize = gfs[0]->tileSize;
01281 bioSeq *targetSeq;
01282 struct ssBundle *bun, *bunList = NULL;
01283 int hitCount;
01284 struct lm *lm = lmInit(0);
01285 enum ffStringency stringency = (isRna ? ffCdna : ffLoose);
01286 
01287 gfTransTransFindClumps(gfs, qTrans->trans, clumps, lm, &hitCount);
01288 for (qFrame = 0; qFrame<3; ++qFrame)
01289     {
01290     for (tFrame=0; tFrame<3; ++tFrame)
01291         {
01292         for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next)
01293             {
01294             struct gfRange *rangeSet = NULL;
01295             clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, NULL, &rangeSet, TRUE, FALSE);
01296             untranslateRangeList(rangeSet, qFrame, tFrame, t3Hash, NULL, 0);
01297             rangeList = slCat(rangeSet, rangeList);
01298             }
01299         }
01300     }
01301 slSort(&rangeList, gfRangeCmpTarget);
01302 rangeList = gfRangesBundle(rangeList, 2000);
01303 for (range = rangeList; range != NULL; range = range->next)
01304     {
01305     targetSeq = range->tSeq;
01306     AllocVar(bun);
01307     bun->qSeq = qSeq;
01308     bun->genoSeq = targetSeq;
01309     bun->ffList = gfRangesToFfItem(range->components, qSeq);
01310     ssStitch(bun, stringency, minMatch, ssAliCount);
01311     slAddHead(&bunList, bun);
01312     }
01313 for (qFrame = 0; qFrame<3; ++qFrame)
01314     for (tFrame=0; tFrame<3; ++tFrame)
01315         gfClumpFreeList(&clumps[qFrame][tFrame]);
01316 gfRangeFreeList(&rangeList);
01317 trans3Free(&qTrans);
01318 lmCleanup(&lm);
01319 slReverse(&bunList);
01320 return bunList;
01321 }

Here is the call graph for this function:

static boolean jiggleSmallExons ( struct ffAli ali,
struct dnaSeq nSeq,
struct dnaSeq hSeq 
) [static]

Definition at line 1349 of file gfBlatLib.c.

References FALSE, ffAliCount(), ffIntronOrientation(), ffSlideIntrons(), ffAli::hEnd, ffAli::hStart, ffAli::left, memMatch(), ffAli::nEnd, ffAli::nStart, ffAli::right, and TRUE.

Referenced by refineSmallExons().

01352 {
01353 struct ffAli *left, *mid, *right;
01354 int orient;
01355 boolean creeped = FALSE;
01356 
01357 if (ffAliCount(ali) < 3)
01358     return FALSE;
01359 orient = ffIntronOrientation(ali);
01360 left = ali;
01361 mid = left->right;
01362 right = mid->right;
01363 while (right != NULL)
01364     {
01365     int midSizeN = mid->nEnd - mid->nStart;
01366     if (midSizeN < 10 && mid->hStart - left->hEnd > 1 && right->hStart - mid->hEnd > 1)
01367         {
01368         DNA *spLeft, *spRight;  /* Splice sites on either side of exon. */
01369         DNA exonX[10+2+2];    /* Storage for exon with splice sites. */
01370         DNA *match;
01371         static int creeps[4][2] = { {2, 2}, {2, 1}, {1, 2}, {1, 1},};
01372         int creepIx, creepL, creepR;
01373         DNA *hs = mid->hStart, *he = mid->hEnd;
01374         DNA *hMin = left->hEnd,  *hMax = right->hStart;
01375         if (orient >= 0)
01376             {
01377             spLeft = "ag";
01378             spRight = "gt";
01379             }
01380         else
01381             {
01382             spLeft = "ac";
01383             spRight = "ct";
01384             }
01385         for (creepIx=0; creepIx<4; ++creepIx)
01386             {
01387             creepL = creeps[creepIx][0];
01388             creepR = creeps[creepIx][1];
01389             /* Check to see if we already match consensus, and if so just bail. */
01390             if (hs[-1] == spLeft[1] && he[0] == spRight[0])
01391                 {
01392                 if ((creepL == 1 || hs[-2] == spLeft[0]) 
01393                         && (creepR == 1 || he[1] == spRight[1]))
01394                     {
01395                     break;
01396                     }
01397                 }
01398             memcpy(exonX, spLeft + 2 - creepL, creepL);
01399             memcpy(exonX + creepL, mid->nStart, midSizeN);
01400             memcpy(exonX + creepL + midSizeN, spRight, creepR);
01401             match = memMatch(exonX, midSizeN + creepR + creepL, hMin, hMax - hMin);
01402             if (match != NULL)
01403                 {
01404                 mid->hStart = match + creepL;
01405                 mid->hEnd = mid->hStart + (he - hs);
01406                 creeped = TRUE;
01407                 break;
01408                 }
01409             }
01410         }
01411     left = mid;
01412     mid = right;
01413     right = right->right;
01414     }
01415 if (creeped)
01416     ffSlideIntrons(ali);
01417 return creeped;
01418 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void loadHashT3Ranges ( struct gfRange rangeList,
char *  tSeqDir,
struct hash tFileCache,
int  qSeqSize,
boolean  isRc,
struct hash **  retT3Hash,
struct dnaSeq **  retSeqList,
struct slRef **  retT3RefList 
) [static]

Definition at line 1000 of file gfBlatLib.c.

References cloneString(), trans3::end, freez(), gfiExpandAndLoadCached(), hashAdd(), hashFindVal(), trans3::isRc, dnaSeq::name, newHash(), gfRange::next, trans3::nibSize, rangeList, refAdd(), slAddHead, slAddTail(), trans3::start, gfRange::t3, gfRange::tEnd, gfRange::tName, trans3New(), TRUE, gfRange::tStart, and gfRange::tTotalSize.

Referenced by gfAlignTrans(), and gfAlignTransTrans().

01006 {
01007 struct hash *t3Hash = newHash(10);
01008 struct dnaSeq *targetSeq, *tSeqList = NULL;
01009 struct slRef *t3RefList = NULL;
01010 struct gfRange *range;
01011 
01012 for (range = rangeList; range != NULL; range = range->next)
01013     {
01014     struct trans3 *t3, *oldT3;
01015 
01016     targetSeq = gfiExpandAndLoadCached(range, tFileCache,
01017         tSeqDir, qSeqSize*3, &range->tTotalSize, TRUE, isRc, usualExpansion);
01018     slAddHead(&tSeqList, targetSeq);
01019     freez(&targetSeq->name);
01020     targetSeq->name = cloneString(range->tName);
01021     t3 = trans3New(targetSeq);
01022     refAdd(&t3RefList, t3);
01023     t3->start = range->tStart;
01024     t3->end = range->tEnd;
01025     t3->nibSize = range->tTotalSize;
01026     t3->isRc = isRc;
01027     if ((oldT3 = hashFindVal(t3Hash, range->tName)) != NULL)
01028         {
01029         slAddTail(&oldT3->next, t3);
01030         }
01031     else
01032         {
01033         hashAdd(t3Hash, range->tName, t3);
01034         }
01035     }
01036 *retT3Hash = t3Hash;
01037 *retSeqList = tSeqList;
01038 *retT3RefList = t3RefList;
01039 }

Here is the call graph for this function:

Here is the caller graph for this function:

void rangeCoorTimes3 ( struct gfRange rangeList  ) 

Definition at line 986 of file gfBlatLib.c.

References gfRange::next, gfRange::qEnd, gfRange::qStart, rangeList, gfRange::tEnd, and gfRange::tStart.

Referenced by gfAlignTrans(), and gfAlignTransTrans().

00989 {
00990 struct gfRange *range;
00991 for (range = rangeList; range != NULL; range = range->next)
00992     {
00993     range->qStart *= 3;
00994     range->qEnd *= 3;
00995     range->tStart = range->tStart*3;
00996     range->tEnd = range->tEnd*3;
00997     }
00998 }

Here is the caller graph for this function:

int rangeScore ( struct gfRange range,
struct dnaSeq qSeq 
)

Definition at line 920 of file gfBlatLib.c.

References gfRange::components, dnaSeq::dna, dnaScoreMatch(), gfRange::next, gfRange::qStart, gfRange::tEnd, gfRange::tSeq, and gfRange::tStart.

00922 {
00923 struct gfRange *comp;
00924 struct dnaSeq *tSeq;
00925 int score = 0;
00926 for (comp = range->components; comp != NULL; comp = comp->next)
00927     {
00928     tSeq = comp->tSeq;
00929     score += dnaScoreMatch(tSeq->dna + range->tStart, qSeq->dna + range->qStart, 
00930         range->tEnd - range->tStart);
00931     if (comp->next != NULL)
00932         score -= 4;
00933     }
00934 return score;
00935 }

Here is the call graph for this function:

static struct ffAli* refineSmallExons ( struct ffAli ff,
struct dnaSeq nSeq,
struct dnaSeq hSeq 
) [static, read]

Definition at line 1420 of file gfBlatLib.c.

References ffRemoveEmptyAlis(), jiggleSmallExons(), and TRUE.

Referenced by refineSmallExonsInBundle().

01425 {
01426 if (jiggleSmallExons(ff, nSeq, hSeq))
01427     ff = ffRemoveEmptyAlis(ff, TRUE);
01428 return ff;
01429 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void refineSmallExonsInBundle ( struct ssBundle bun  )  [static]

Definition at line 1431 of file gfBlatLib.c.

References ssFfItem::ff, ssBundle::ffList, ssBundle::genoSeq, ssFfItem::next, ssBundle::qSeq, and refineSmallExons().

01435 {
01436 struct ssFfItem *fi;    /* Item list - memory owned by bundle. */
01437 
01438 for (fi = bun->ffList; fi != NULL; fi = fi->next)
01439     {
01440     fi->ff = refineSmallExons(fi->ff, bun->qSeq, bun->genoSeq);
01441     }
01442 }

Here is the call graph for this function:

static void saveAlignments ( char *  chromName,
int  chromSize,
int  chromOffset,
struct ssBundle bun,
struct hash t3Hash,
boolean  qIsRc,
boolean  tIsRc,
enum ffStringency  stringency,
int  minMatch,
struct gfOutput out 
) [static]

Definition at line 477 of file gfBlatLib.c.

References ssFfItem::ff, ssBundle::ffList, ssBundle::genoSeq, hashMustFindVal(), ssBundle::isProt, dnaSeq::name, ssFfItem::next, gfOutput::out, ssBundle::qSeq, scoreAli(), and dnaSeq::size.

Referenced by gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), and gfFindAlignAaTrans().

00482 {
00483 struct dnaSeq *tSeq = bun->genoSeq, *qSeq = bun->qSeq;
00484 struct ssFfItem *ffi;
00485 if (minMatch > qSeq->size/2) minMatch = qSeq->size/2;
00486 if (minMatch < 1) minMatch = 1;
00487 for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next)
00488     {
00489     struct ffAli *ff = ffi->ff;
00490     struct trans3 *t3List = NULL;
00491     int score;
00492     if (t3Hash != NULL)
00493         t3List = hashMustFindVal(t3Hash, tSeq->name);
00494     score = scoreAli(ff, bun->isProt, stringency, tSeq, t3List);
00495     if (score >= minMatch)
00496         {
00497         out->out(chromName, chromSize, chromOffset, ff, tSeq, t3Hash, qSeq, 
00498             qIsRc, tIsRc, stringency, minMatch, out);
00499         }
00500     }
00501 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int scoreAli ( struct ffAli ali,
boolean  isProt,
enum ffStringency  stringency,
struct dnaSeq tSeq,
struct trans3 t3List 
) [static]

Definition at line 449 of file gfBlatLib.c.

References aaScoreMatch(), dnaScoreMatch(), FALSE, ffCalcGapPenalty(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, ffAli::right, trans3GenoPos(), and TRUE.

Referenced by saveAlignments().

00453 {
00454 int (*scoreFunc)(char *a, char *b, int size);
00455 struct ffAli *ff, *nextFf;
00456 int score = 0;
00457 if (isProt) 
00458     scoreFunc = aaScoreMatch;
00459 else
00460     scoreFunc = dnaScoreMatch;
00461 for (ff = ali; ff != NULL; ff = nextFf)
00462     {
00463     nextFf = ff->right;
00464     score += scoreFunc(ff->nStart, ff->hStart, ff->nEnd-ff->nStart);
00465     if (nextFf != NULL)
00466         {
00467         int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE);
00468         int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE);
00469         int hGap = nhStart - ohEnd;
00470         int nGap = nextFf->nStart - ff->nEnd;
00471         score -= ffCalcGapPenalty(hGap, nGap, stringency);
00472         }
00473     }
00474 return score;
00475 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct gfRange* seqClumpToRangeList ( struct gfClump clumpList,
int  frame 
) [static, read]

Definition at line 601 of file gfBlatLib.c.

References AllocVar, cloneString(), clumpTargetName(), name, gfClump::next, gfClump::qEnd, gfClump::qStart, rangeList, gfSeqSource::seq, slAddHead, slReverse(), gfSeqSource::start, gfClump::target, gfClump::tEnd, and gfClump::tStart.

Referenced by gfAlignTrans(), gfAlignTransTrans(), and gfClumpsToBundles().

00603 {
00604 struct gfRange *rangeList = NULL, *range;
00605 struct gfClump *clump;
00606 char *name;
00607 int tOff;
00608 
00609 for (clump = clumpList; clump != NULL; clump = clump->next)
00610     {
00611     tOff = clump->target->start;
00612     AllocVar(range);
00613     range->qStart = clump->qStart;
00614     range->qEnd = clump->qEnd;
00615     name = clumpTargetName(clump);
00616     range->tName = cloneString(name);
00617     range->tStart = clump->tStart - tOff;
00618     range->tEnd = clump->tEnd - tOff;
00619     range->tSeq = clump->target->seq;
00620     range->frame = frame;
00621     slAddHead(&rangeList, range);
00622     }
00623 slReverse(&rangeList);
00624 return rangeList;
00625 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void startSeqQuery ( int  conn,
bioSeq seq,
char *  type 
) [static]

Definition at line 109 of file gfBlatLib.c.

References dnaSeq::dna, errAbort(), gfSignature(), and dnaSeq::size.

Referenced by gfQuerySeq(), gfQuerySeqTrans(), and gfQuerySeqTransTrans().

00111 {
00112 char buf[256];
00113 sprintf(buf, "%s%s %d", gfSignature(), type, seq->size);
00114 write(conn, buf, strlen(buf));
00115 read(conn, buf, 1);
00116 if (buf[0] != 'Y')
00117     errAbort("Expecting 'Y' from server, got %c", buf[0]);
00118 write(conn, seq->dna, seq->size);
00119 }

Here is the call graph for this function:

Here is the caller graph for this function:

void untranslateRangeList ( struct gfRange rangeList,
int  qFrame,
int  tFrame,
struct hash t3Hash,
struct trans3 t3,
int  tOffset 
)

Definition at line 1145 of file gfBlatLib.c.

References dnaSeq::name, gfRange::next, gfRange::qEnd, gfRange::qStart, rangeList, trans3::seq, gfRange::t3, gfRange::tEnd, trans3Find(), gfRange::tSeq, and gfRange::tStart.

Referenced by gfAlignTransTrans(), and gfTransTransFindBundles().

01148 {
01149 struct gfRange *range;
01150 for (range = rangeList; range != NULL; range = range->next)
01151     {
01152     range->qStart = 3*range->qStart + qFrame;
01153     range->qEnd = 3*range->qEnd + qFrame;
01154     range->tStart = 3*range->tStart + tFrame;
01155     range->tEnd = 3*range->tEnd + tFrame;
01156     if (t3Hash)
01157         t3 = trans3Find(t3Hash, range->tSeq->name, range->tStart + tOffset, range->tEnd + tOffset);
01158     range->tSeq = t3->seq;
01159     range->t3 = t3;
01160     }
01161 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char const rcsid[] = "$Id: gfBlatLib.c,v 1.25 2007/04/20 22:44:11 kent Exp $" [static]

Definition at line 21 of file gfBlatLib.c.

int ssAliCount = 16 [static]

Definition at line 23 of file gfBlatLib.c.

Referenced by gfAlignSomeClumps(), gfAlignStrand(), gfAlignTrans(), gfAlignTransTrans(), gfClumpsToBundles(), gfFindAlignAaTrans(), and gfTransTransFindBundles().

int usualExpansion = 500 [static]

Definition at line 397 of file gfBlatLib.c.


Generated on Tue Dec 25 19:27:25 2007 for blat by  doxygen 1.5.2