inc/genoFind.h File Reference

#include "dnaseq.h"
#include "fuzzyFind.h"
#include "hash.h"
#include "localmem.h"
#include "axt.h"

Include dependency graph for genoFind.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  gfSeqSource
struct  gfHit
struct  gfClump
struct  genoFind
struct  gfOutput

Defines

#define gfVersion   "34"

Enumerations

enum  gfConstants {
  gfMinMatch = 2, gfMaxGap = 2, gfTileSize = 11, gfMaxTileUse = 1024,
  gfPepMaxTileUse = 30000
}

Functions

void gfClumpFree (struct gfClump **pClump)
void gfClumpFreeList (struct gfClump **pList)
void genoFindFree (struct genoFind **pGenoFind)
gfSeqSourcegfFindNamedSource (struct genoFind *gf, char *name)
gfOutputgfOutputAny (char *format, int goodPpt, boolean qIsProt, boolean tIsProt, boolean noHead, char *databaseName, int databaseSeqCount, double databaseLetters, double minIdentity, FILE *f)
gfOutputgfOutputPsl (int goodPpt, boolean qIsProt, boolean tIsProt, FILE *f, boolean saveSeq, boolean noHead)
gfOutputgfOutputAxt (int goodPpt, boolean qIsProt, boolean tIsProt, FILE *f)
gfOutputgfOutputAxtMem (int goodPpt, boolean qIsProt, boolean tIsProt)
gfOutputgfOutputBlast (int goodPpt, boolean qIsProt, boolean tIsProt, char *databaseName, int databaseSeqCount, double databaseLetters, char *blastType, double minIdentity, FILE *f)
void gfOutputQuery (struct gfOutput *out, FILE *f)
void gfOutputHead (struct gfOutput *out, FILE *f)
void gfOutputFree (struct gfOutput **pOut)
void gfCheckTileSize (int tileSize, boolean isPep)
genoFindgfIndexSeq (bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean allowOneMismatch, boolean maskUpper, int stepSize)
genoFindgfIndexNibsAndTwoBits (int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, int stepSize)
void gfIndexTransNibsAndTwoBits (struct genoFind *transGf[2][3], int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, boolean mask, int stepSize)
gfClumpgfFindClumps (struct genoFind *gf, struct dnaSeq *seq, struct lm *lm, int *retHitCount)
gfClumpgfFindClumpsWithQmask (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount)
gfHitgfFindHitsInRegion (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, struct gfSeqSource *target, int tMin, int tMax)
void gfTransFindClumps (struct genoFind *gfs[3], aaSeq *seq, struct gfClump *clumps[3], struct lm *lm, int *retHitCount)
void gfTransTransFindClumps (struct genoFind *gfs[3], aaSeq *seqs[3], struct gfClump *clumps[3][3], struct lm *lm, int *retHitCount)
void gfClumpDump (struct genoFind *gf, struct gfClump *clump, FILE *f)
void gfAlignAaClumps (struct genoFind *gf, struct gfClump *clumpList, aaSeq *seq, boolean isRc, int minMatch, struct gfOutput *out)
void gfFindAlignAaTrans (struct genoFind *gfs[3], aaSeq *qSeq, struct hash *t3Hash, boolean tIsRc, int minMatch, struct gfOutput *out)
char * gfSignature ()
void gfCatchPipes ()
int gfReadMulti (int sd, void *vBuf, size_t size)
hashgfFileCacheNew ()
void gfFileCacheFree (struct hash **pCache)
void gfAlignStrand (int *pConn, char *nibDir, struct dnaSeq *seq, boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out)
void gfAlignTrans (int *pConn, char *nibDir, aaSeq *seq, int minMatch, struct hash *tFileHash, struct gfOutput *out)
void gfAlignTransTrans (int *pConn, char *nibDir, struct dnaSeq *seq, boolean qIsRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, boolean isRna)
int gfConnect (char *hostName, char *portName)
void gfMakeOoc (char *outName, char *files[], int fileCount, int tileSize, bits32 maxPat, enum gfType tType)
void gfLongDnaInMem (struct dnaSeq *query, struct genoFind *gf, boolean isRc, int minScore, Bits *qMaskBits, struct gfOutput *out, boolean fastMap, boolean band)
void gfLongTransTransInMem (struct dnaSeq *query, struct genoFind *gfs[3], struct hash *t3Hash, boolean qIsRc, boolean tIsRc, boolean qIsRna, int minScore, struct gfOutput *out)
gfClumpgfPcrClumps (struct genoFind *gf, char *fPrimer, int fPrimerSize, char *rPrimer, int rPrimerSize, int minDistance, int maxDistance)


Define Documentation

#define gfVersion   "34"

Definition at line 377 of file genoFind.h.

Referenced by startServer(), tabBlastOut(), and usage().


Enumeration Type Documentation

enum gfConstants

Enumerator:
gfMinMatch 
gfMaxGap 
gfTileSize 
gfMaxTileUse 
gfPepMaxTileUse 

Definition at line 36 of file genoFind.h.

00036                  {
00037     gfMinMatch = 2,
00038     gfMaxGap = 2,
00039     gfTileSize = 11,
00040     gfMaxTileUse = 1024,
00041     gfPepMaxTileUse = 30000,
00042 };


Function Documentation

void genoFindFree ( struct genoFind **  pGenoFind  ) 

Definition at line 67 of file genoFind.c.

References genoFind::allocated, bitFree(), freeMem(), freez(), genoFind::lists, genoFind::listSizes, gfSeqSource::maskedBits, genoFind::sourceCount, and genoFind::sources.

Referenced by bigBlat(), genoFindDirect(), genoPcrDirect(), and gfMakeOoc().

00069 {
00070 struct genoFind *gf = *pGenoFind;
00071 int i;
00072 struct gfSeqSource *sources;
00073 if (gf != NULL)
00074     {
00075     freeMem(gf->lists);
00076     freeMem(gf->listSizes);
00077     freeMem(gf->allocated);
00078     if ((sources = gf->sources) != NULL)
00079         {
00080         for (i=0; i<gf->sourceCount; ++i)
00081             bitFree(&sources[i].maskedBits);
00082         freeMem(sources);
00083         }
00084     freez(pGenoFind);
00085     }
00086 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignAaClumps ( struct genoFind gf,
struct gfClump clumpList,
aaSeq seq,
boolean  isRc,
int  minMatch,
struct gfOutput out 
)

Definition at line 913 of file gfBlatLib.c.

References ffTight, gfAlignSomeClumps(), and TRUE.

Referenced by searchOneProt().

00916 {
00917 gfAlignSomeClumps(gf, clumpList, seq, isRc, minMatch, out, TRUE, ffTight);
00918 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignStrand ( int *  pConn,
char *  nibDir,
struct dnaSeq seq,
boolean  isRc,
int  minMatch,
struct hash tFileCache,
struct gfOutput out 
)

Definition at line 552 of file gfBlatLib.c.

References alignComponents(), AllocVar, FALSE, ffCdna, ffIntronMax, freeDnaSeq(), getTargetName(), gfiExpandAndLoadCached(), gfQuerySeq(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfOutput::includeTargetFile, range::next, PATH_LEN, rangeList, saveAlignments(), dnaSeq::size, slSort(), ssAliCount, ssBundleFree(), and ssStitch().

Referenced by doBlat(), and gfClient().

00557 {
00558 struct ssBundle *bun;
00559 struct gfRange *rangeList = NULL, *range;
00560 struct dnaSeq *targetSeq;
00561 char targetName[PATH_LEN];
00562 
00563 rangeList = gfQuerySeq(*pConn, seq);
00564 close(*pConn);
00565 *pConn = -1;
00566 slSort(&rangeList, gfRangeCmpTarget);
00567 /* Dump range list to file for debugging. */
00568 rangeList = gfRangesBundle(rangeList, ffIntronMax);
00569 for (range = rangeList; range != NULL; range = range->next)
00570     {
00571     getTargetName(range->tName, out->includeTargetFile, targetName);
00572     targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, 
00573         seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion);
00574     AllocVar(bun);
00575     bun->qSeq = seq;
00576     bun->genoSeq = targetSeq;
00577     alignComponents(range, bun, ffCdna);
00578     ssStitch(bun, ffCdna, minMatch, ssAliCount);
00579     saveAlignments(targetName, range->tTotalSize, range->tStart, 
00580         bun, NULL, isRc, FALSE, ffCdna, minMatch, out);
00581     ssBundleFree(&bun);
00582     freeDnaSeq(&targetSeq);
00583     }
00584 gfRangeFreeList(&rangeList);
00585 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignTrans ( int *  pConn,
char *  nibDir,
aaSeq seq,
int  minMatch,
struct hash tFileHash,
struct gfOutput out 
)

Definition at line 1042 of file gfBlatLib.c.

References AllocVar, clumpTargetName(), clumpToHspRange(), trans3::end, FALSE, ffCdna, ffIntronMax, ssBundle::ffList, freeDnaSeqList(), freeHash(), freeMem(), ssBundle::genoSeq, getTargetName(), gfClumpFreeList(), gfQuerySeqTrans(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), hashMustFindVal(), gfOutput::includeTargetFile, ssBundle::isProt, lm, lmCleanup(), lmInit(), loadHashT3Ranges(), gfSeqSource::next, slRef::next, gfClump::next, trans3::nibSize, PATH_LEN, ssBundle::qSeq, rangeCoorTimes3(), rangeList, saveAlignments(), trans3::seq, seqClumpToRangeList(), dnaSeq::size, slCat(), slFreeList(), slReverse(), slSort(), ss, ssAliCount, ssBundleFree(), ssStitch(), trans3::start, ssBundle::t3List, gfClump::target, gfClump::tEnd, tileSize, trans3::trans, trans3Find(), trans3Free(), TRUE, and gfClump::tStart.

Referenced by doBlat(), and gfClient().

01047 {
01048 struct ssBundle *bun;
01049 struct gfClump *clumps[2][3], *clump;
01050 struct gfRange *rangeList = NULL, *range, *rl;
01051 struct dnaSeq *targetSeq, *tSeqList = NULL;
01052 char targetName[PATH_LEN];
01053 int tileSize;
01054 int frame, isRc = 0;
01055 struct hash *t3Hash = NULL;
01056 struct slRef *t3RefList = NULL, *ref;
01057 struct gfSeqSource *ssList = NULL, *ss;
01058 struct trans3 *t3;
01059 struct lm *lm = lmInit(0);
01060 
01061 /* Get clumps from server. */
01062 gfQuerySeqTrans(*pConn, seq, clumps, lm, &ssList, &tileSize);
01063 close(*pConn);
01064 *pConn = -1;
01065 
01066 for (isRc = 0; isRc <= 1;  ++isRc)
01067     {
01068     /* Figure out which parts of sequence we need to load. */
01069     for (frame = 0; frame < 3; ++frame)
01070         {
01071         rl = seqClumpToRangeList(clumps[isRc][frame], frame);
01072         rangeList = slCat(rangeList, rl);
01073         }
01074     /* Convert from amino acid to nucleotide coordinates. */
01075     rangeCoorTimes3(rangeList);
01076     slSort(&rangeList, gfRangeCmpTarget);
01077     rangeList = gfRangesBundle(rangeList, ffIntronMax);
01078     loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, 
01079         isRc, &t3Hash, &tSeqList, &t3RefList);
01080 
01081     /* The old range list was not very precise - it was just to get
01082      * the DNA loaded.  */
01083     gfRangeFreeList(&rangeList);
01084 
01085 
01086     /* Patch up clump list and associated sequence source to refer
01087      * to bits of genome loaded into memory.  Create new range list
01088      * by extending hits in clumps. */
01089     for (frame = 0; frame < 3; ++frame)
01090         {
01091         for (clump = clumps[isRc][frame]; clump != NULL; clump = clump->next)
01092             {
01093             struct gfSeqSource *ss = clump->target;
01094             t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3);
01095             ss->seq = t3->trans[frame];
01096             ss->start = t3->start/3;
01097             ss->end = t3->end/3;
01098             clumpToHspRange(clump, seq, tileSize, frame, t3, &rangeList, TRUE, FALSE);
01099             }
01100         }
01101     slReverse(&rangeList);
01102     slSort(&rangeList, gfRangeCmpTarget);
01103     rangeList = gfRangesBundle(rangeList, ffIntronMax/3);
01104 
01105     /* Do detailed alignment of each of the clustered ranges. */
01106     for (range = rangeList; range != NULL; range = range->next)
01107         {
01108         targetSeq = range->tSeq;
01109         AllocVar(bun);
01110         bun->qSeq = seq;
01111         bun->genoSeq = targetSeq;
01112         bun->ffList = gfRangesToFfItem(range->components, seq);
01113         bun->isProt = TRUE;
01114         t3 = hashMustFindVal(t3Hash, range->tName);
01115         bun->t3List = t3;
01116         ssStitch(bun, ffCdna, minMatch, ssAliCount);
01117         getTargetName(range->tName, out->includeTargetFile, targetName);
01118         saveAlignments(targetName, t3->nibSize, 0, 
01119             bun, t3Hash, FALSE, isRc, ffCdna, minMatch, out);
01120         ssBundleFree(&bun);
01121         }
01122 
01123     /* Cleanup for this strand of database. */
01124     gfRangeFreeList(&rangeList);
01125     freeHash(&t3Hash);
01126     for (ref = t3RefList; ref != NULL; ref = ref->next)
01127         {
01128         struct trans3 *t3 = ref->val;
01129         trans3Free(&t3);
01130         }
01131     slFreeList(&t3RefList);
01132     freeDnaSeqList(&tSeqList);
01133     }
01134 
01135 /* Final cleanup. */
01136 for (isRc=0; isRc<=1; ++isRc)
01137     for (frame=0; frame<3; ++frame)
01138         gfClumpFreeList(&clumps[isRc][frame]);
01139 for (ss = ssList; ss != NULL; ss = ss->next)
01140     freeMem(ss->fileName);
01141 slFreeList(&ssList);
01142 lmCleanup(&lm);
01143 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfAlignTransTrans ( int *  pConn,
char *  nibDir,
struct dnaSeq seq,
boolean  qIsRc,
int  minMatch,
struct hash tFileCache,
struct gfOutput out,
boolean  isRna 
)

Definition at line 1163 of file gfBlatLib.c.

References AllocVar, clumpTargetName(), clumpToHspRange(), FALSE, ffCdna, ffIntronMax, ffLoose, freeDnaSeqList(), freeHash(), freeMem(), getTargetName(), gfQuerySeqTransTrans(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfOutput::includeTargetFile, lm, lmCleanup(), lmInit(), loadHashT3Ranges(), gfSeqSource::next, slRef::next, gfClump::next, PATH_LEN, rangeCoorTimes3(), rangeList, saveAlignments(), trans3::seq, seqClumpToRangeList(), dnaSeq::size, slCat(), slFreeList(), slReverse(), slSort(), ss, ssAliCount, ssBundleFree(), ssStitch(), gfClump::target, gfClump::tEnd, tileSize, trans3::trans, trans3Find(), trans3Free(), trans3New(), TRUE, gfClump::tStart, and untranslateRangeList().

Referenced by doBlat(), and gfClient().

01170 {
01171 struct gfClump *clumps[2][3][3], *clump;
01172 char targetName[PATH_LEN];
01173 int qFrame, tFrame, tIsRc;
01174 struct gfSeqSource *ssList = NULL, *ss;
01175 struct lm *lm = lmInit(0);
01176 int tileSize;
01177 struct gfRange *rangeList = NULL, *rl, *range;
01178 struct trans3 *qTrans = trans3New(qSeq), *t3;
01179 struct slRef *t3RefList = NULL, *t3Ref;
01180 struct hash *t3Hash = NULL;
01181 struct dnaSeq *tSeqList = NULL;
01182 enum ffStringency stringency = (isRna ? ffCdna : ffLoose);
01183 
01184 /* Query server for clumps. */
01185 gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize);
01186 close(*pConn);
01187 *pConn = -1;
01188 
01189 for (tIsRc=0; tIsRc <= 1; ++tIsRc)
01190     {
01191     /* Figure out which ranges need to be loaded and load them. */
01192     for (qFrame = 0; qFrame < 3; ++qFrame)
01193         {
01194         for (tFrame = 0; tFrame < 3; ++tFrame)
01195             {
01196             rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame);
01197             rangeList = slCat(rangeList, rl);
01198             }
01199         }
01200     rangeCoorTimes3(rangeList);
01201     slSort(&rangeList, gfRangeCmpTarget);
01202     rangeList = gfRangesBundle(rangeList, ffIntronMax);
01203     loadHashT3Ranges(rangeList, tSeqDir, tFileCache,
01204         qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList);
01205 
01206     /* The old range list was not very precise - it was just to get
01207      * the DNA loaded.  */
01208     gfRangeFreeList(&rangeList);
01209 
01210     /* Patch up clump list and associated sequence source to refer
01211      * to bits of genome loaded into memory.  Create new range list
01212      * by extending hits in clumps. */
01213     for (qFrame = 0; qFrame < 3; ++qFrame)
01214         {
01215         for (tFrame = 0; tFrame < 3; ++tFrame)
01216             {
01217             for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next)
01218                 {
01219                 struct gfSeqSource *ss = clump->target;
01220                 struct gfRange *rangeSet = NULL;
01221                 t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3);
01222                 ss->seq = t3->trans[tFrame];
01223                 ss->start = t3->start/3;
01224                 ss->end = t3->end/3;
01225                 clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE);
01226                 untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start);
01227                 rangeList = slCat(rangeSet, rangeList);
01228                 }
01229             }
01230         }
01231     slReverse(&rangeList);
01232     slSort(&rangeList, gfRangeCmpTarget);
01233     rangeList = gfRangesBundle(rangeList, ffIntronMax);
01234 
01235     for (range = rangeList; range != NULL; range = range->next)
01236         {
01237         struct dnaSeq *targetSeq = range->tSeq;
01238         struct ssBundle *bun;
01239 
01240         AllocVar(bun);
01241         bun->qSeq = qSeq;
01242         bun->genoSeq = targetSeq;
01243         bun->ffList = gfRangesToFfItem(range->components, qSeq);
01244         ssStitch(bun, stringency, minMatch, ssAliCount);
01245         getTargetName(range->tName, out->includeTargetFile, targetName);
01246         t3 = range->t3;
01247         saveAlignments(targetName, t3->nibSize, t3->start, 
01248             bun, NULL, qIsRc, tIsRc, stringency, minMatch, out);
01249         ssBundleFree(&bun);
01250         }
01251 
01252     /* Cleanup for this strand of database. */
01253     gfRangeFreeList(&rangeList);
01254     freeHash(&t3Hash);
01255     for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next)
01256         {
01257         struct trans3 *t3 = t3Ref->val;
01258         trans3Free(&t3);
01259         }
01260     slFreeList(&t3RefList);
01261     freeDnaSeqList(&tSeqList);
01262     }
01263 trans3Free(&qTrans);
01264 for (ss = ssList; ss != NULL; ss = ss->next)
01265     freeMem(ss->fileName);
01266 slFreeList(&ssList);
01267 lmCleanup(&lm);
01268 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfCatchPipes (  ) 

Definition at line 38 of file genoFind.c.

References gfPipeHandler().

Referenced by main().

00040 {
00041 signal(SIGPIPE, gfPipeHandler);
00042 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfCheckTileSize ( int  tileSize,
boolean  isPep 
)

Definition at line 97 of file genoFind.c.

References errAbort().

Referenced by gfNewEmpty(), and main().

00099 {
00100 if (isPep)
00101     {
00102     if (tileSize < 3 || tileSize > 8)
00103         errAbort("protein tileSize must be between 3 and 8");
00104     }
00105 else
00106     {
00107     if (tileSize < 6 || tileSize > 18)
00108         errAbort("DNA tileSize must be between 6 and 18");
00109     }
00110 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfClumpDump ( struct genoFind gf,
struct gfClump clump,
FILE *  f 
)

Definition at line 973 of file genoFind.c.

References gfClump::hitCount, gfClump::hitList, name, gfHit::next, gfClump::qEnd, gfClump::qStart, ss, gfClump::target, gfClump::tEnd, and gfClump::tStart.

Referenced by genoFindDirect(), and genoPcrDirect().

00975 {
00976 struct gfSeqSource *ss = clump->target;
00977 char *name = ss->fileName;
00978 
00979 if (name == NULL) name = ss->seq->name;
00980 fprintf(f, "%u-%u %s %u-%u, hits %d\n", 
00981         clump->qStart, clump->qEnd, name,
00982         clump->tStart - ss->start, clump->tEnd - ss->start,
00983         clump->hitCount);
00984 #ifdef SOMETIMES
00985 for (hit = clump->hitList; hit != NULL; hit = hit->next)
00986     fprintf(f, "   q %d, t %d, diag %d\n", hit->qStart, hit->tStart, hit->diagonal);
00987 #endif
00988 }

Here is the caller graph for this function:

void gfClumpFree ( struct gfClump **  pClump  ) 

Definition at line 951 of file genoFind.c.

References freez().

Referenced by clumpNear(), gfClumpFreeList(), and targetClump().

00953 {
00954 struct gfClump *clump;
00955 if ((clump = *pClump) == NULL)
00956     return;
00957 freez(pClump);
00958 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfClumpFreeList ( struct gfClump **  pList  ) 

Definition at line 960 of file genoFind.c.

References gfClumpFree(), and gfClump::next.

Referenced by dnaQuery(), ffSeedExtInMem(), genoFindDirect(), gfAlignTrans(), gfFindAlignAaTrans(), gfTransTransFindBundles(), pcrQuery(), searchOneProt(), transQuery(), and transTransQuery().

00962 {
00963 struct gfClump *el, *next;
00964 
00965 for (el = *pList; el != NULL; el = next)
00966     {
00967     next = el->next;
00968     gfClumpFree(&el);
00969     }
00970 *pList = NULL;
00971 }

Here is the call graph for this function:

Here is the caller graph for this function:

int gfConnect ( char *  hostName,
char *  portName 
)

Definition at line 10 of file gfNet.c.

References errnoAbort(), and netConnect().

Referenced by doBlat(), gfClient(), and gfPcrGetRanges().

00012 {
00013 /* Connect to server. */
00014 int sd = netConnect(hostName, atoi(portName));
00015 if (sd < 0)
00016     {
00017     errnoAbort("Sorry, the BLAT/iPCR server seems to be down.  Please try "
00018                "again later.");
00019     }
00020 return sd;
00021 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfFileCacheFree ( struct hash **  pCache  ) 

Definition at line 526 of file gfBlatLib.c.

References gfFileCacheFreeEl(), hashFree, and hashTraverseEls().

Referenced by gfClient(), and gfPcrViaNet().

00528 {
00529 struct hash *cache = *pCache;
00530 if (cache != NULL)
00531     {
00532     hashTraverseEls(cache, gfFileCacheFreeEl);
00533     hashFree(pCache);
00534     }
00535 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct hash* gfFileCacheNew (  )  [read]

Definition at line 503 of file gfBlatLib.c.

References hashNew.

Referenced by doBlat(), gfClient(), and gfPcrViaNet().

00505 {
00506 return hashNew(0);
00507 }

Here is the caller graph for this function:

void gfFindAlignAaTrans ( struct genoFind gfs[3],
aaSeq qSeq,
struct hash t3Hash,
boolean  tIsRc,
int  minMatch,
struct gfOutput out 
)

Definition at line 938 of file gfBlatLib.c.

References AllocVar, clumpToHspRange(), FALSE, ffCdna, ffIntronMax, ssBundle::ffList, ssBundle::genoSeq, gfClumpFreeList(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfTransFindClumps(), hashMustFindVal(), ssBundle::isProt, lm, lmCleanup(), lmInit(), dnaSeq::name, gfClump::next, ssBundle::qSeq, rangeList, saveAlignments(), trans3::seq, dnaSeq::size, slReverse(), slSort(), ssAliCount, ssBundleFree(), ssStitch(), ssBundle::t3List, genoFind::tileSize, tileSize, and TRUE.

Referenced by tripleSearch().

00942 {
00943 struct gfClump *clumps[3];
00944 int frame;
00945 struct gfClump *clump;
00946 struct gfRange *rangeList = NULL, *range;
00947 aaSeq *targetSeq;
00948 struct ssBundle *bun;
00949 int tileSize = gfs[0]->tileSize;
00950 struct trans3 *t3;
00951 int hitCount;
00952 struct lm *lm = lmInit(0);
00953 
00954 gfTransFindClumps(gfs, qSeq, clumps, lm, &hitCount);
00955 for (frame=0; frame<3; ++frame)
00956     {
00957     for (clump = clumps[frame]; clump != NULL; clump = clump->next)
00958         {
00959         clumpToHspRange(clump, qSeq, tileSize, frame, NULL, &rangeList, TRUE, FALSE);
00960         }
00961     }
00962 slReverse(&rangeList);
00963 slSort(&rangeList, gfRangeCmpTarget);
00964 rangeList = gfRangesBundle(rangeList, ffIntronMax/3);
00965 for (range = rangeList; range != NULL; range = range->next)
00966     {
00967     targetSeq = range->tSeq;
00968     t3 = hashMustFindVal(t3Hash, targetSeq->name);
00969     AllocVar(bun);
00970     bun->qSeq = qSeq;
00971     bun->genoSeq = targetSeq;
00972     bun->ffList = gfRangesToFfItem(range->components, qSeq);
00973     bun->isProt = TRUE;
00974     bun->t3List = t3;
00975     ssStitch(bun, ffCdna, minMatch, ssAliCount);
00976     saveAlignments(targetSeq->name, t3->seq->size, 0, 
00977         bun, t3Hash, FALSE, tIsRc, ffCdna, minMatch, out);
00978     ssBundleFree(&bun);
00979     }
00980 gfRangeFreeList(&rangeList);
00981 for (frame=0; frame<3; ++frame)
00982     gfClumpFreeList(&clumps[frame]);
00983 lmCleanup(&lm);
00984 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfClump* gfFindClumps ( struct genoFind gf,
struct dnaSeq seq,
struct lm lm,
int *  retHitCount 
) [read]

Definition at line 1890 of file genoFind.c.

References gfFindClumpsWithQmask(), and lm.

Referenced by dnaQuery(), genoFindDirect(), gfTransFindClumps(), and searchOneProt().

01892 {
01893 return gfFindClumpsWithQmask(gf, seq, NULL, 0, lm, retHitCount);
01894 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfClump* gfFindClumpsWithQmask ( struct genoFind gf,
bioSeq seq,
Bits qMaskBits,
int  qMaskOffset,
struct lm lm,
int *  retHitCount 
) [read]

Definition at line 1849 of file genoFind.c.

References clumpHits(), cmpQuerySize, gfFindHitsWithQmask(), lm, genoFind::minMatch, minMatch, dnaSeq::size, and genoFind::tileSize.

Referenced by ffSeedExtInMem(), and gfFindClumps().

01853 {
01854 struct gfClump *clumpList = NULL;
01855 struct gfHit *hitList;
01856 int minMatch = gf->minMatch;
01857 
01858 #ifdef OLD      /* stepSize makes this obsolete. */
01859 if (seq->size < gf->tileSize * (gf->minMatch+1))
01860      minMatch = 1;
01861 #endif /* OLD */
01862 
01863 hitList =  gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm,
01864         retHitCount, NULL, 0, 0);
01865 cmpQuerySize = seq->size;
01866 clumpList = clumpHits(gf, hitList, minMatch);
01867 return clumpList;
01868 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfHit* gfFindHitsInRegion ( struct genoFind gf,
bioSeq seq,
Bits qMaskBits,
int  qMaskOffset,
struct lm lm,
struct gfSeqSource target,
int  tMin,
int  tMax 
) [read]

Definition at line 1870 of file genoFind.c.

References gfFindHitsWithQmask(), lm, gfHit::next, gfSeqSource::start, and gfHit::tStart.

Referenced by scanIndexForSmallExons().

01877 {
01878 int targetStart;
01879 struct gfHit *hitList, *hit;
01880 int hitCount;
01881 
01882 targetStart = target->start;
01883 hitList =  gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm,
01884         &hitCount, target, tMin + targetStart, tMax + targetStart);
01885 for (hit = hitList; hit != NULL; hit = hit->next)
01886     hit->tStart -= targetStart;
01887 return hitList;
01888 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfSeqSource* gfFindNamedSource ( struct genoFind gf,
char *  name 
) [read]

Definition at line 2010 of file genoFind.c.

References gfSeqSource::fileName, dnaSeq::name, sameString, gfSeqSource::seq, genoFind::sourceCount, genoFind::sources, and splitPath().

Referenced by refineBundle().

02012 {
02013 struct gfSeqSource *source = gf->sources;
02014 int count = gf->sourceCount;
02015 
02016 if (source->seq == NULL)        /* Use first source to see if seq or file. */
02017     {
02018     char rootName[256];
02019     while (--count >= 0)
02020         {
02021         splitPath(source->fileName, NULL, rootName, NULL);
02022         if (sameString(name, rootName))
02023              return source;
02024         }
02025     }
02026 else
02027     {
02028     while (--count >= 0)
02029         {
02030         if (sameString(source->seq->name, name))
02031             return source;
02032         source += 1;
02033         }
02034     }
02035 return NULL;
02036 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct genoFind* gfIndexNibsAndTwoBits ( int  fileCount,
char *  fileNames[],
int  minMatch,
int  maxGap,
int  tileSize,
int  maxPat,
char *  oocFile,
boolean  allowOneMismatch,
int  stepSize 
) [read]

Definition at line 514 of file genoFind.c.

References AllocArray, baseCount, bits32, cloneString(), dnaSeqFree, errAbort(), FALSE, gfAddSeq(), gfAddTilesInNib(), gfAllocLists(), gfCountTilesInNib(), gfCountTilesInTwoBit(), gfNewEmpty(), gfZeroNonOverused(), gfZeroOverused(), twoBitFile::indexList, maxTotalBases(), twoBitIndex::name, twoBitIndex::next, nibIsFile(), PATH_LEN, safef(), dnaSeq::size, genoFind::sourceCount, genoFind::sources, ss, genoFind::tileSize, genoFind::totalSeqSize, twoBitClose(), twoBitIsFile(), twoBitOpen(), and twoBitReadSeqFragLower().

Referenced by genoFindDirect(), genoPcrDirect(), and startServer().

00525 {
00526 struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize,
00527         maxPat, oocFile, FALSE, allowOneMismatch);
00528 int i;
00529 bits32 offset = 0, nibSize;
00530 char *fileName;
00531 struct gfSeqSource *ss;
00532 long long totalBases = 0, warnAt = maxTotalBases();
00533 int totalSeq = 0;
00534 
00535 if (allowOneMismatch)
00536     errAbort("Don't currently support allowOneMismatch in gfIndexNibsAndTwoBits");
00537 if (stepSize == 0)
00538     stepSize = gf->tileSize;
00539 for (i=0; i<fileCount; ++i)
00540     {
00541     fileName = fileNames[i];
00542     if (twoBitIsFile(fileName))
00543         {
00544         int seqCount;
00545         long long baseCount;
00546         gfCountTilesInTwoBit(gf, stepSize, fileName, &seqCount, &baseCount);
00547         totalBases += baseCount;
00548         totalSeq += seqCount;
00549         }
00550     else if (nibIsFile(fileName))
00551         {
00552         totalBases += gfCountTilesInNib(gf, stepSize, fileName);
00553         totalSeq += 1;
00554         }
00555     else
00556         errAbort("Unrecognized file type %s", fileName);
00557     /* Warn if they exceed 4 gig. */
00558     if (totalBases >= warnAt)
00559         errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that.");
00560     }
00561 gfAllocLists(gf);
00562 gfZeroNonOverused(gf);
00563 AllocArray(gf->sources, totalSeq);
00564 gf->sourceCount = totalSeq;
00565 ss = gf->sources;
00566 for (i=0; i<fileCount; ++i)
00567     {
00568     fileName = fileNames[i];
00569     if (nibIsFile(fileName))
00570         {
00571         nibSize = gfAddTilesInNib(gf, fileName, offset, stepSize);
00572         ss->fileName = fileName;
00573         ss->start = offset;
00574         offset += nibSize;
00575         ss->end = offset;
00576         ++ss;
00577         }
00578     else
00579         {
00580         struct twoBitFile *tbf = twoBitOpen(fileName);
00581         struct twoBitIndex *index;
00582         char nameBuf[PATH_LEN+256];
00583         for (index = tbf->indexList; index != NULL; index = index->next)
00584             {
00585             struct dnaSeq *seq = twoBitReadSeqFragLower(tbf, index->name, 0,0);
00586             gfAddSeq(gf, seq, offset);
00587             safef(nameBuf, sizeof(nameBuf), "%s:%s", fileName, index->name);
00588             ss->fileName = cloneString(nameBuf);
00589             ss->start = offset;
00590             offset += seq->size;
00591             ss->end = offset;
00592             ++ss;
00593             dnaSeqFree(&seq);
00594             }
00595         twoBitClose(&tbf);
00596         }
00597     }
00598 gf->totalSeqSize = offset;
00599 gfZeroOverused(gf);
00600 printf("Done adding\n");
00601 return gf;
00602 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct genoFind* gfIndexSeq ( bioSeq seqList,
int  minMatch,
int  maxGap,
int  tileSize,
int  maxPat,
char *  oocFile,
boolean  isPep,
boolean  allowOneMismatch,
boolean  maskUpper,
int  stepSize 
) [read]

Definition at line 907 of file genoFind.c.

References gfLargeIndexSeq(), gfNewEmpty(), gfSmallIndexSeq(), and genoFind::segSize.

Referenced by bigBlat(), and blat().

00913 {
00914 struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, maxPat, 
00915                                 oocFile, isPep, allowOneMismatch);
00916 if (stepSize == 0)
00917     stepSize = tileSize;
00918 if (gf->segSize > 0)
00919     {
00920     gfLargeIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper);
00921     }
00922 else
00923     {
00924     gfSmallIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper);
00925     }
00926 return gf;
00927 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfIndexTransNibsAndTwoBits ( struct genoFind transGf[2][3],
int  fileCount,
char *  fileNames[],
int  minMatch,
int  maxGap,
int  tileSize,
int  maxPat,
char *  oocFile,
boolean  allowOneMismatch,
boolean  mask,
int  stepSize 
)

Definition at line 717 of file genoFind.c.

References AllocArray, bits32, errAbort(), freeDnaSeq(), gfAllocLists(), gfNewEmpty(), gfZeroNonOverused(), gfZeroOverused(), twoBitFile::indexList, maskSimplePepRepeat(), maxTotalBases(), twoBitIndex::name, twoBitIndex::next, nibIsFile(), PATH_LEN, readMaskedNib(), readMaskedTwoBit(), safef(), dnaSeq::size, genoFind::totalSeqSize, transCountBothStrands(), transIndexBothStrands(), TRUE, twoBitCheckTotalSize(), twoBitClose(), twoBitIsFile(), and twoBitOpen().

Referenced by startServer().

00722 {
00723 struct genoFind *gf;
00724 int i,isRc, frame;
00725 bits32 offset[2][3];
00726 char *fileName;
00727 struct dnaSeq *seq;
00728 int sourceCount = 0;
00729 long long totalBases = 0, warnAt = maxTotalBases();
00730 
00731 if (allowOneMismatch)
00732     errAbort("Don't currently support allowOneMismatch in gfIndexTransNibsAndTwoBits");
00733 /* Allocate indices for all reading frames. */
00734 for (isRc=0; isRc <= 1; ++isRc)
00735     {
00736     for (frame = 0; frame < 3; ++frame)
00737         {
00738         transGf[isRc][frame] = gf = gfNewEmpty(minMatch, maxGap, 
00739                 tileSize, stepSize, maxPat, oocFile, TRUE, allowOneMismatch);
00740         }
00741     }
00742 
00743 /* Mask simple AA repeats (of period 1 and 2). */
00744 for (isRc = 0; isRc <= 1; ++isRc)
00745     for (frame = 0; frame < 3; ++frame)
00746         maskSimplePepRepeat(transGf[isRc][frame]);
00747 
00748 /* Scan through .nib and .2bit files once counting tiles. */
00749 for (i=0; i<fileCount; ++i)
00750     {
00751     fileName = fileNames[i];
00752     printf("Counting %s\n", fileName);
00753     if (nibIsFile(fileName))
00754         {
00755         seq = readMaskedNib(fileName, doMask);
00756         transCountBothStrands(seq, transGf);
00757         sourceCount += 1;
00758         totalBases += seq->size;
00759         freeDnaSeq(&seq);
00760         }
00761     else if (twoBitIsFile(fileName))
00762         {
00763         struct twoBitFile *tbf = twoBitOpen(fileName);
00764         struct twoBitIndex *index;
00765         totalBases += twoBitCheckTotalSize(tbf);
00766 
00767         for (index = tbf->indexList; index != NULL; index = index->next)
00768             {
00769             seq = readMaskedTwoBit(tbf, index->name, doMask);
00770             transCountBothStrands(seq, transGf);
00771             sourceCount += 1;
00772             freeDnaSeq(&seq);
00773             }
00774         twoBitClose(&tbf);
00775         }
00776     else 
00777         errAbort("Unrecognized file type %s", fileName);
00778     if (totalBases >= warnAt)
00779         errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that.");
00780     }
00781 
00782 /* Get space for entries in indexed of all reading frames. */
00783 for (isRc=0; isRc <= 1; ++isRc)
00784     {
00785     for (frame = 0; frame < 3; ++frame)
00786         {
00787         gf = transGf[isRc][frame];
00788         gfAllocLists(gf);
00789         gfZeroNonOverused(gf);
00790         AllocArray(gf->sources, sourceCount);
00791         gf->sourceCount = sourceCount;
00792         offset[isRc][frame] = 0;
00793         }
00794     }
00795 
00796 /* Scan through nibs a second time building index. */
00797 sourceCount = 0;
00798 for (i=0; i<fileCount; ++i)
00799     {
00800     fileName = fileNames[i];
00801     printf("Indexing %s\n", fileName);
00802     if (nibIsFile(fileName))
00803         {
00804         seq = readMaskedNib(fileName, doMask);
00805         transIndexBothStrands(seq, transGf, offset, sourceCount, fileName);
00806         freeDnaSeq(&seq);
00807         sourceCount += 1;
00808         }
00809     else        /* .2bit file */
00810         {
00811         struct twoBitFile *tbf = twoBitOpen(fileName);
00812         struct twoBitIndex *index;
00813         for (index = tbf->indexList; index != NULL; index = index->next)
00814             {
00815             char nameBuf[PATH_LEN+256];
00816             safef(nameBuf, sizeof(nameBuf), "%s:%s", fileName, index->name);
00817             seq = readMaskedTwoBit(tbf, index->name, doMask);
00818             transIndexBothStrands(seq, transGf, offset, sourceCount, nameBuf);
00819             sourceCount += 1;
00820             freeDnaSeq(&seq);
00821             }
00822         twoBitClose(&tbf);
00823         }
00824     }
00825 
00826 for (isRc=0; isRc <= 1; ++isRc)
00827     {
00828     for (frame = 0; frame < 3; ++frame)
00829         {
00830         gf = transGf[isRc][frame];
00831         gf->totalSeqSize = offset[isRc][frame];
00832         gfZeroOverused(gf);
00833         }
00834     }
00835 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfLongDnaInMem ( struct dnaSeq query,
struct genoFind gf,
boolean  isRc,
int  minScore,
Bits qMaskBits,
struct gfOutput out,
boolean  fastMap,
boolean  band 
)

Definition at line 1478 of file gfBlatLib.c.

References lmInit(), newHash(), rangeList, and dnaSeq::size.

Referenced by searchOneStrand().

01483 {
01484 int hitCount;
01485 int maxSize = 5000;
01486 int preferredSize = 4500;
01487 int overlapSize = 250;
01488 struct dnaSeq subQuery = *query;
01489 struct lm *lm = lmInit(0);
01490 int subOffset, subSize, nextOffset;
01491 DNA saveEnd, *endPos;
01492 struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
01493 struct hash *bunHash = newHash(8);
01494 
01495 for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
01496     {
01497     struct gfClump *clumpList;
01498     struct gfRange *rangeList = NULL;
01499 
01500     /* Figure out size of this piece.  If query is
01501      * maxSize or less do it all.   Otherwise just
01502      * do prefered size, and set it up to overlap
01503      * with surrounding pieces by overlapSize.  */
01504     if (subOffset == 0 && query->size <= maxSize)
01505         nextOffset = subSize = query->size;
01506     else
01507         {
01508         subSize = preferredSize;
01509         if (subSize + subOffset >= query->size)
01510             {
01511             subSize = query->size - subOffset;
01512             nextOffset = query->size;
01513             }
01514         else
01515             {
01516             nextOffset = subOffset + preferredSize - overlapSize;
01517             }
01518         }
01519     subQuery.dna = query->dna + subOffset;
01520     subQuery.size = subSize;
01521     endPos = &subQuery.dna[subSize];
01522     saveEnd = *endPos;
01523     *endPos = 0;
01524     if (band)
01525         {
01526         oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc);
01527         }
01528     else
01529         {
01530         clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount);
01531         if (fastMap)
01532             {
01533             oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery);
01534             }
01535         else
01536             {
01537             oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList);
01538             gfRangeFreeList(&rangeList);
01539             }
01540         gfClumpFreeList(&clumpList);
01541         }
01542     addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
01543     *endPos = saveEnd;
01544     }
01545 #ifdef DEBUG
01546 dumpBunList(bigBunList);
01547 #endif /* DEBUG */
01548 for (bun = bigBunList; bun != NULL; bun = bun->next)
01549     {
01550     ssStitch(bun, ffCdna, minScore, ssAliCount);
01551     if (!fastMap && !band)
01552         refineSmallExonsInBundle(bun);
01553     saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
01554         bun, NULL, isRc, FALSE, ffCdna, minScore, out);
01555     }
01556 ssBundleFreeList(&bigBunList);
01557 freeHash(&bunHash);
01558 lmCleanup(&lm);
01559 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfLongTransTransInMem ( struct dnaSeq query,
struct genoFind gfs[3],
struct hash t3Hash,
boolean  qIsRc,
boolean  tIsRc,
boolean  qIsRna,
int  minScore,
struct gfOutput out 
)

Definition at line 1562 of file gfBlatLib.c.

References ffCdna, ffLoose, newHash(), and dnaSeq::size.

Referenced by transTripleSearch().

01567 {
01568 enum ffStringency stringency = (qIsRna ? ffCdna : ffLoose);
01569 int maxSize = 1500;
01570 int preferredSize = 1200;       /* PreferredSize - overlapSize might need to be multiple of 3. */
01571 int overlapSize = 270;
01572 struct dnaSeq subQuery = *query;
01573 int subOffset, subSize, nextOffset;
01574 DNA saveEnd, *endPos;
01575 struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
01576 struct hash *bunHash = newHash(8);
01577 
01578 for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
01579     {
01580     /* Figure out size of this piece.  If query is
01581      * maxSize or less do it all.   Otherwise just
01582      * do prefered size, and set it up to overlap
01583      * with surrounding pieces by overlapSize.  */
01584     if (subOffset == 0 && query->size <= maxSize)
01585         nextOffset = subSize = query->size;
01586     else
01587         {
01588         subSize = preferredSize;
01589         if (subSize + subOffset >= query->size)
01590             {
01591             subSize = query->size - subOffset;
01592             nextOffset = query->size;
01593             }
01594         else
01595             {
01596             nextOffset = subOffset + preferredSize - overlapSize;
01597             }
01598         }
01599     subQuery.dna = query->dna + subOffset;
01600     subQuery.size = subSize;
01601     endPos = &subQuery.dna[subSize];
01602     saveEnd = *endPos;
01603     *endPos = 0;
01604     oneBunList = gfTransTransFindBundles(gfs, &subQuery, t3Hash, qIsRc, minScore, qIsRna);
01605     addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
01606     *endPos = saveEnd;
01607     }
01608 for (bun = bigBunList; bun != NULL; bun = bun->next)
01609     {
01610     ssStitch(bun, ffCdna, minScore, ssAliCount);
01611     saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
01612         bun, NULL, qIsRc, tIsRc, stringency, minScore, out);
01613     }
01614 hashFree(&bunHash);
01615 ssBundleFreeList(&bigBunList);
01616 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfMakeOoc ( char *  outName,
char *  files[],
int  fileCount,
int  tileSize,
bits32  maxPat,
enum gfType  tType 
)

Definition at line 1928 of file genoFind.c.

References bits32, carefulClose(), dnaSeq::dna, errAbort(), FALSE, faReadAllSeq(), freeDnaSeqList(), genoFindFree(), gfCountSeq(), gfMaxGap, gfMinMatch, gfNewEmpty(), gftDnaX, gftProt, gftRnaX, genoFind::listSizes, mustOpen(), dnaSeq::next, nibIsFile(), nibLoadAll(), oocSig, reverseComplement(), genoFind::segSize, dnaSeq::size, genoFind::tileSpaceSize, toLowerN(), trans3Free(), trans3New(), twoBitIsFile(), twoBitLoadAll(), and writeOne.

Referenced by blat().

01931 {
01932 boolean dbIsPep = (tType == gftProt || tType == gftDnaX || tType == gftRnaX);
01933 struct genoFind *gf = gfNewEmpty(gfMinMatch, gfMaxGap, tileSize, tileSize,
01934         maxPat, NULL, dbIsPep, FALSE);
01935 bits32 *sizes = gf->listSizes;
01936 int tileSpaceSize = gf->tileSpaceSize;
01937 bioSeq *seq, *seqList;
01938 bits32 sig = oocSig, psz = tileSize;
01939 bits32 i;
01940 int oocCount = 0;
01941 char *inName;
01942 FILE *f = mustOpen(outName, "w");
01943 
01944 if (gf->segSize > 0)
01945     errAbort("Don't yet know how to make ooc files for large tile sizes.");
01946 for (i=0; i<fileCount; ++i)
01947     {
01948     inName = files[i];
01949     printf("Loading %s\n", inName);
01950     if (nibIsFile(inName))
01951         {
01952         seqList = nibLoadAll(inName);
01953         }
01954     else if (twoBitIsFile(inName))
01955         {
01956         seqList = twoBitLoadAll(inName);
01957         for (seq = seqList; seq != NULL; seq = seq->next)
01958             toLowerN(seq->dna, seq->size);
01959         }
01960     else
01961         {
01962         seqList = faReadAllSeq(inName, tType != gftProt);
01963         }
01964     printf("Counting %s\n", inName);
01965     for (seq = seqList; seq != NULL; seq = seq->next)
01966         {
01967         int isRc;
01968         for (isRc = 0; isRc <= 1; ++isRc)
01969             {
01970             if (tType == gftDnaX || tType == gftRnaX)
01971                 {
01972                 struct trans3 *t3 = trans3New(seq);
01973                 int frame;
01974                 for (frame=0; frame<3; ++frame)
01975                     {
01976                     gfCountSeq(gf, t3->trans[frame]);
01977                     }
01978                 trans3Free(&t3);
01979                 }
01980             else
01981                 {
01982                 gfCountSeq(gf, seq);
01983                 }
01984             if (tType == gftProt || tType == gftRnaX)
01985                 break;
01986             else 
01987                 {
01988                 reverseComplement(seq->dna, seq->size);
01989                 }
01990             }
01991         }
01992     freeDnaSeqList(&seqList);
01993     }
01994 printf("Writing %s\n", outName);
01995 writeOne(f, sig);
01996 writeOne(f, psz);
01997 for (i=0; i<tileSpaceSize; ++i)
01998     {
01999     if (sizes[i] >= maxPat)
02000         {
02001         writeOne(f, i);
02002         ++oocCount;
02003         }
02004     }
02005 carefulClose(&f);
02006 genoFindFree(&gf);
02007 printf("Wrote %d overused %d-mers to %s\n", oocCount, tileSize, outName);
02008 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfOutput* gfOutputAny ( char *  format,
int  goodPpt,
boolean  qIsProt,
boolean  tIsProt,
boolean  noHead,
char *  databaseName,
int  databaseSeqCount,
double  databaseLetters,
double  minIdentity,
FILE *  f 
) [read]

Definition at line 508 of file gfOut.c.

References ArraySize, errAbort(), FALSE, gfOutputAxt(), gfOutputBlast(), gfOutputMaf(), gfOutputPsl(), gfOutputSim4(), gfOutput::out, sameWord, stringArrayIx(), and TRUE.

Referenced by blat(), and gfClient().

00515              :
00516  *    format - either 'psl', 'pslx', 'sim4', 'blast', 'wublast', 'axt', 'xml'
00517  *    goodPpt - minimum identity of alignments to output in parts per thousand
00518  *    qIsProt - true if query side is a protein.
00519  *    tIsProt - true if target (database) side is a protein.
00520  *    noHead - if true suppress header in psl/pslx output.
00521  *    databaseName - name of database.  Only used for blast output
00522  *    databaseSeq - number of sequences in database - only for blast
00523  *    databaseLetters - number of bases/aas in database - only blast
00524  *    minIdentity - minimum identity - only blast
00525  *    FILE *f - file.  
00526  */
00527 {
00528 struct gfOutput *out = NULL;
00529 static char *blastTypes[] = {"blast", "wublast", "blast8", "blast9", "xml"};
00530 
00531 if (format == NULL)
00532     format = "psl";
00533 if (sameWord(format, "psl"))
00534     out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, FALSE, noHead);
00535 else if (sameWord(format, "pslx"))
00536     out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, TRUE, noHead);
00537 else if (sameWord(format, "sim4"))
00538     out = gfOutputSim4(goodPpt, qIsProt, tIsProt, databaseName);
00539 else if (stringArrayIx(format, blastTypes, ArraySize(blastTypes)) >= 0)
00540     out = gfOutputBlast(goodPpt, qIsProt, tIsProt, 
00541             databaseName, databaseSeqCount, databaseLetters, format, 
00542             minIdentity, f);
00543 else if (sameWord(format, "axt"))
00544     out = gfOutputAxt(goodPpt, qIsProt, tIsProt, f);
00545 else if (sameWord(format, "maf"))
00546     out = gfOutputMaf(goodPpt, qIsProt, tIsProt, f);
00547 else
00548     errAbort("Unrecognized output format '%s'", format);
00549 return out;
00550 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfOutput* gfOutputAxt ( int  goodPpt,
boolean  qIsProt,
boolean  tIsProt,
FILE *  f 
) [read]

Definition at line 459 of file gfOut.c.

References axtQueryOut(), gfOutputAxtMem(), and gfOutput::out.

Referenced by gfOutputAny().

00462 {
00463 struct gfOutput *out = gfOutputAxtMem(goodPpt, qIsProt, tIsProt);
00464 out->queryOut = axtQueryOut;
00465 return out;
00466 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfOutput* gfOutputAxtMem ( int  goodPpt,
boolean  qIsProt,
boolean  tIsProt 
) [read]

Definition at line 447 of file gfOut.c.

References AllocVar, gfOutput::data, gfOutputInit(), gfOutput::out, and saveAxtBundle().

Referenced by gfOutputAxt(), gfOutputBlast(), gfOutputMaf(), and gfOutputSim4().

00450 {
00451 struct gfOutput *out = gfOutputInit(goodPpt, qIsProt, tIsProt);
00452 struct axtData *ad;
00453 AllocVar(ad);
00454 out->out = saveAxtBundle;
00455 out->data = ad;
00456 return out;
00457 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gfOutput* gfOutputBlast ( int  goodPpt,
boolean  qIsProt,
boolean  tIsProt,
char *  databaseName,
int  databaseSeqCount,
double  databaseLetters,
char *  blastType,
double  minIdentity,
FILE *  f 
) [read]

Definition at line 491 of file gfOut.c.

References blastQueryOut(), axtData::blastType, gfOutput::data, axtData::databaseLetters, axtData::databaseName, axtData::databaseSeqCount, gfOutputAxtMem(), axtData::minIdentity, gfOutput::out, and gfOutput::queryOut.

Referenced by gfOutputAny().

00496 {
00497 struct gfOutput *out = gfOutputAxtMem(goodPpt, qIsProt, tIsProt);
00498 struct axtData *ad = out->data;
00499 ad->databaseName = databaseName;
00500 ad->databaseSeqCount = databaseSeqCount;
00501 ad->databaseLetters = databaseLetters;
00502 ad->blastType = blastType;
00503 ad->minIdentity = minIdentity;
00504 out->queryOut = blastQueryOut;
00505 return out;
00506 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfOutputFree ( struct gfOutput **  pOut  ) 

Definition at line 567 of file gfOut.c.

References freeMem(), freez(), and gfOutput::out.

00569 {
00570 struct gfOutput *out = *pOut;
00571 if (out != NULL)
00572     {
00573     freeMem(out->data);
00574     freez(pOut);
00575     }
00576 }

Here is the call graph for this function:

void gfOutputHead ( struct gfOutput out,
FILE *  f 
)

Definition at line 560 of file gfOut.c.

References gfOutput::out.

Referenced by gfClient(), and searchOneIndex().

00562 {
00563 if (out->fileHead != NULL)
00564     out->fileHead(out, f);
00565 }

Here is the caller graph for this function:

struct gfOutput* gfOutputPsl ( int  goodPpt,
boolean  qIsProt,
boolean  tIsProt,
FILE *  f,
boolean  saveSeq,
boolean  noHead 
) [read]

Definition at line 429 of file gfOut.c.

References AllocVar, gfOutput::data, pslxData::f, gfOutput::fileHead, gfOutputInit(), gfOutput::out, pslHead(), pslOut(), and pslxData::saveSeq.

Referenced by doBlat(), and gfOutputAny().

00433 {
00434 struct gfOutput *out = gfOutputInit(goodPpt, qIsProt, tIsProt);
00435 struct pslxData *pslData;
00436 
00437 AllocVar(pslData);
00438 pslData->saveSeq = saveSeq;
00439 pslData->f = f;
00440 out->out = pslOut;
00441 out->data = pslData;
00442 if (!noHead)
00443     out->fileHead = pslHead;
00444 return out;
00445 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfOutputQuery ( struct gfOutput out,
FILE *  f 
)

Definition at line 552 of file gfOut.c.

References gfOutput::out.

Referenced by bigBlat(), doBlat(), gfClient(), and searchOne().

00554 {
00555 ++out->queryIx;
00556 if (out->queryOut != NULL)
00557     out->queryOut(out, f);
00558 }

Here is the caller graph for this function:

struct gfClump* gfPcrClumps ( struct genoFind gf,
char *  fPrimer,
int  fPrimerSize,
char *  rPrimer,
int  rPrimerSize,
int  minDistance,
int  maxDistance 
) [read]

Definition at line 2155 of file genoFind.c.

References errAbort(), genoFind::isPep, pcrClumps(), reverseComplement(), genoFind::segSize, and tolowers().

Referenced by genoPcrDirect(), and pcrQuery().

02158 {
02159 struct gfClump *clumpList;
02160 if (gf->segSize > 0)
02161     errAbort("Can't do PCR on large tile sizes");
02162 if (gf->isPep)
02163     errAbort("Can't do PCR on protein/translated index");
02164 tolowers(fPrimer);
02165 tolowers(rPrimer);
02166 reverseComplement(rPrimer, rPrimerSize);
02167 clumpList = pcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 
02168         minDistance, maxDistance);
02169 reverseComplement(rPrimer, rPrimerSize);
02170 return clumpList;
02171 }

Here is the call graph for this function:

Here is the caller graph for this function:

int gfReadMulti ( int  sd,
void *  vBuf,
size_t  size 
)

Definition at line 44 of file genoFind.c.

Referenced by startServer().

00046 {
00047 char *buf = vBuf;
00048 size_t totalRead = 0;
00049 int oneRead;
00050 
00051 while (totalRead < size)
00052     {
00053     oneRead = read(sd, buf + totalRead, size - totalRead);
00054     if (oneRead < 0)
00055         {
00056         perror("Couldn't finish large read");
00057         return oneRead;
00058         }
00059     else if (oneRead == 0)
00060     /* Avoid an infinite loop when the client closed the socket. */
00061         break;
00062     totalRead += oneRead;
00063     }
00064 return totalRead;
00065 }

Here is the caller graph for this function:

char* gfSignature (  ) 

Definition at line 22 of file genoFind.c.

Referenced by getFileList(), gfPcrGetRanges(), pcrServer(), queryServer(), startSeqQuery(), startServer(), statusServer(), and stopServer().

00025 {
00026 static char signature[] = "0ddf270562684f29";
00027 return signature;
00028 }

Here is the caller graph for this function:

void gfTransFindClumps ( struct genoFind gfs[3],
aaSeq seq,
struct gfClump clumps[3],
struct lm lm,
int *  retHitCount 
)

Definition at line 1897 of file genoFind.c.

References gfFindClumps(), gfClump::hitCount, and lm.

Referenced by gfFindAlignAaTrans(), gfTransTransFindClumps(), and transQuery().

01899 {
01900 int frame;
01901 int oneHit;
01902 int hitCount = 0;
01903 for (frame = 0; frame < 3; ++frame)
01904     {
01905     clumps[frame] = gfFindClumps(gfs[frame], seq, lm, &oneHit);
01906     hitCount += oneHit;
01907     }
01908 *retHitCount = hitCount;
01909 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gfTransTransFindClumps ( struct genoFind gfs[3],
aaSeq seqs[3],
struct gfClump clumps[3][3],
struct lm lm,
int *  retHitCount 
)

Definition at line 1911 of file genoFind.c.

References gfTransFindClumps(), gfClump::hitCount, and lm.

Referenced by gfTransTransFindBundles(), and transTransQuery().

01915 {
01916 int qFrame;
01917 int oneHit;
01918 int hitCount = 0;
01919 
01920 for (qFrame = 0; qFrame<3; ++qFrame)
01921     {
01922     gfTransFindClumps(gfs, seqs[qFrame], clumps[qFrame], lm, &oneHit);
01923     hitCount += oneHit;
01924     }
01925 *retHitCount = hitCount;
01926 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Tue Dec 25 18:58:22 2007 for blat by  doxygen 1.5.2