#include "dnaseq.h"#include "fuzzyFind.h"#include "hash.h"#include "localmem.h"#include "axt.h"Include dependency graph for genoFind.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Data Structures | |
| struct | gfSeqSource |
| struct | gfHit |
| struct | gfClump |
| struct | genoFind |
| struct | gfOutput |
Defines | |
| #define | gfVersion "34" |
Enumerations | |
| enum | gfConstants { gfMinMatch = 2, gfMaxGap = 2, gfTileSize = 11, gfMaxTileUse = 1024, gfPepMaxTileUse = 30000 } |
Functions | |
| void | gfClumpFree (struct gfClump **pClump) |
| void | gfClumpFreeList (struct gfClump **pList) |
| void | genoFindFree (struct genoFind **pGenoFind) |
| gfSeqSource * | gfFindNamedSource (struct genoFind *gf, char *name) |
| gfOutput * | gfOutputAny (char *format, int goodPpt, boolean qIsProt, boolean tIsProt, boolean noHead, char *databaseName, int databaseSeqCount, double databaseLetters, double minIdentity, FILE *f) |
| gfOutput * | gfOutputPsl (int goodPpt, boolean qIsProt, boolean tIsProt, FILE *f, boolean saveSeq, boolean noHead) |
| gfOutput * | gfOutputAxt (int goodPpt, boolean qIsProt, boolean tIsProt, FILE *f) |
| gfOutput * | gfOutputAxtMem (int goodPpt, boolean qIsProt, boolean tIsProt) |
| gfOutput * | gfOutputBlast (int goodPpt, boolean qIsProt, boolean tIsProt, char *databaseName, int databaseSeqCount, double databaseLetters, char *blastType, double minIdentity, FILE *f) |
| void | gfOutputQuery (struct gfOutput *out, FILE *f) |
| void | gfOutputHead (struct gfOutput *out, FILE *f) |
| void | gfOutputFree (struct gfOutput **pOut) |
| void | gfCheckTileSize (int tileSize, boolean isPep) |
| genoFind * | gfIndexSeq (bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean allowOneMismatch, boolean maskUpper, int stepSize) |
| genoFind * | gfIndexNibsAndTwoBits (int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, int stepSize) |
| void | gfIndexTransNibsAndTwoBits (struct genoFind *transGf[2][3], int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, boolean mask, int stepSize) |
| gfClump * | gfFindClumps (struct genoFind *gf, struct dnaSeq *seq, struct lm *lm, int *retHitCount) |
| gfClump * | gfFindClumpsWithQmask (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount) |
| gfHit * | gfFindHitsInRegion (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, struct gfSeqSource *target, int tMin, int tMax) |
| void | gfTransFindClumps (struct genoFind *gfs[3], aaSeq *seq, struct gfClump *clumps[3], struct lm *lm, int *retHitCount) |
| void | gfTransTransFindClumps (struct genoFind *gfs[3], aaSeq *seqs[3], struct gfClump *clumps[3][3], struct lm *lm, int *retHitCount) |
| void | gfClumpDump (struct genoFind *gf, struct gfClump *clump, FILE *f) |
| void | gfAlignAaClumps (struct genoFind *gf, struct gfClump *clumpList, aaSeq *seq, boolean isRc, int minMatch, struct gfOutput *out) |
| void | gfFindAlignAaTrans (struct genoFind *gfs[3], aaSeq *qSeq, struct hash *t3Hash, boolean tIsRc, int minMatch, struct gfOutput *out) |
| char * | gfSignature () |
| void | gfCatchPipes () |
| int | gfReadMulti (int sd, void *vBuf, size_t size) |
| hash * | gfFileCacheNew () |
| void | gfFileCacheFree (struct hash **pCache) |
| void | gfAlignStrand (int *pConn, char *nibDir, struct dnaSeq *seq, boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out) |
| void | gfAlignTrans (int *pConn, char *nibDir, aaSeq *seq, int minMatch, struct hash *tFileHash, struct gfOutput *out) |
| void | gfAlignTransTrans (int *pConn, char *nibDir, struct dnaSeq *seq, boolean qIsRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, boolean isRna) |
| int | gfConnect (char *hostName, char *portName) |
| void | gfMakeOoc (char *outName, char *files[], int fileCount, int tileSize, bits32 maxPat, enum gfType tType) |
| void | gfLongDnaInMem (struct dnaSeq *query, struct genoFind *gf, boolean isRc, int minScore, Bits *qMaskBits, struct gfOutput *out, boolean fastMap, boolean band) |
| void | gfLongTransTransInMem (struct dnaSeq *query, struct genoFind *gfs[3], struct hash *t3Hash, boolean qIsRc, boolean tIsRc, boolean qIsRna, int minScore, struct gfOutput *out) |
| gfClump * | gfPcrClumps (struct genoFind *gf, char *fPrimer, int fPrimerSize, char *rPrimer, int rPrimerSize, int minDistance, int maxDistance) |
| #define gfVersion "34" |
| enum gfConstants |
Definition at line 36 of file genoFind.h.
00036 { 00037 gfMinMatch = 2, 00038 gfMaxGap = 2, 00039 gfTileSize = 11, 00040 gfMaxTileUse = 1024, 00041 gfPepMaxTileUse = 30000, 00042 };
| void genoFindFree | ( | struct genoFind ** | pGenoFind | ) |
Definition at line 67 of file genoFind.c.
References genoFind::allocated, bitFree(), freeMem(), freez(), genoFind::lists, genoFind::listSizes, gfSeqSource::maskedBits, genoFind::sourceCount, and genoFind::sources.
Referenced by bigBlat(), genoFindDirect(), genoPcrDirect(), and gfMakeOoc().
00069 { 00070 struct genoFind *gf = *pGenoFind; 00071 int i; 00072 struct gfSeqSource *sources; 00073 if (gf != NULL) 00074 { 00075 freeMem(gf->lists); 00076 freeMem(gf->listSizes); 00077 freeMem(gf->allocated); 00078 if ((sources = gf->sources) != NULL) 00079 { 00080 for (i=0; i<gf->sourceCount; ++i) 00081 bitFree(&sources[i].maskedBits); 00082 freeMem(sources); 00083 } 00084 freez(pGenoFind); 00085 } 00086 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfAlignAaClumps | ( | struct genoFind * | gf, | |
| struct gfClump * | clumpList, | |||
| aaSeq * | seq, | |||
| boolean | isRc, | |||
| int | minMatch, | |||
| struct gfOutput * | out | |||
| ) |
Definition at line 913 of file gfBlatLib.c.
References ffTight, gfAlignSomeClumps(), and TRUE.
Referenced by searchOneProt().
00916 { 00917 gfAlignSomeClumps(gf, clumpList, seq, isRc, minMatch, out, TRUE, ffTight); 00918 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfAlignStrand | ( | int * | pConn, | |
| char * | nibDir, | |||
| struct dnaSeq * | seq, | |||
| boolean | isRc, | |||
| int | minMatch, | |||
| struct hash * | tFileCache, | |||
| struct gfOutput * | out | |||
| ) |
Definition at line 552 of file gfBlatLib.c.
References alignComponents(), AllocVar, FALSE, ffCdna, ffIntronMax, freeDnaSeq(), getTargetName(), gfiExpandAndLoadCached(), gfQuerySeq(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfOutput::includeTargetFile, range::next, PATH_LEN, rangeList, saveAlignments(), dnaSeq::size, slSort(), ssAliCount, ssBundleFree(), and ssStitch().
Referenced by doBlat(), and gfClient().
00557 { 00558 struct ssBundle *bun; 00559 struct gfRange *rangeList = NULL, *range; 00560 struct dnaSeq *targetSeq; 00561 char targetName[PATH_LEN]; 00562 00563 rangeList = gfQuerySeq(*pConn, seq); 00564 close(*pConn); 00565 *pConn = -1; 00566 slSort(&rangeList, gfRangeCmpTarget); 00567 /* Dump range list to file for debugging. */ 00568 rangeList = gfRangesBundle(rangeList, ffIntronMax); 00569 for (range = rangeList; range != NULL; range = range->next) 00570 { 00571 getTargetName(range->tName, out->includeTargetFile, targetName); 00572 targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, 00573 seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion); 00574 AllocVar(bun); 00575 bun->qSeq = seq; 00576 bun->genoSeq = targetSeq; 00577 alignComponents(range, bun, ffCdna); 00578 ssStitch(bun, ffCdna, minMatch, ssAliCount); 00579 saveAlignments(targetName, range->tTotalSize, range->tStart, 00580 bun, NULL, isRc, FALSE, ffCdna, minMatch, out); 00581 ssBundleFree(&bun); 00582 freeDnaSeq(&targetSeq); 00583 } 00584 gfRangeFreeList(&rangeList); 00585 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfAlignTrans | ( | int * | pConn, | |
| char * | nibDir, | |||
| aaSeq * | seq, | |||
| int | minMatch, | |||
| struct hash * | tFileHash, | |||
| struct gfOutput * | out | |||
| ) |
Definition at line 1042 of file gfBlatLib.c.
References AllocVar, clumpTargetName(), clumpToHspRange(), trans3::end, FALSE, ffCdna, ffIntronMax, ssBundle::ffList, freeDnaSeqList(), freeHash(), freeMem(), ssBundle::genoSeq, getTargetName(), gfClumpFreeList(), gfQuerySeqTrans(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), hashMustFindVal(), gfOutput::includeTargetFile, ssBundle::isProt, lm, lmCleanup(), lmInit(), loadHashT3Ranges(), gfSeqSource::next, slRef::next, gfClump::next, trans3::nibSize, PATH_LEN, ssBundle::qSeq, rangeCoorTimes3(), rangeList, saveAlignments(), trans3::seq, seqClumpToRangeList(), dnaSeq::size, slCat(), slFreeList(), slReverse(), slSort(), ss, ssAliCount, ssBundleFree(), ssStitch(), trans3::start, ssBundle::t3List, gfClump::target, gfClump::tEnd, tileSize, trans3::trans, trans3Find(), trans3Free(), TRUE, and gfClump::tStart.
Referenced by doBlat(), and gfClient().
01047 { 01048 struct ssBundle *bun; 01049 struct gfClump *clumps[2][3], *clump; 01050 struct gfRange *rangeList = NULL, *range, *rl; 01051 struct dnaSeq *targetSeq, *tSeqList = NULL; 01052 char targetName[PATH_LEN]; 01053 int tileSize; 01054 int frame, isRc = 0; 01055 struct hash *t3Hash = NULL; 01056 struct slRef *t3RefList = NULL, *ref; 01057 struct gfSeqSource *ssList = NULL, *ss; 01058 struct trans3 *t3; 01059 struct lm *lm = lmInit(0); 01060 01061 /* Get clumps from server. */ 01062 gfQuerySeqTrans(*pConn, seq, clumps, lm, &ssList, &tileSize); 01063 close(*pConn); 01064 *pConn = -1; 01065 01066 for (isRc = 0; isRc <= 1; ++isRc) 01067 { 01068 /* Figure out which parts of sequence we need to load. */ 01069 for (frame = 0; frame < 3; ++frame) 01070 { 01071 rl = seqClumpToRangeList(clumps[isRc][frame], frame); 01072 rangeList = slCat(rangeList, rl); 01073 } 01074 /* Convert from amino acid to nucleotide coordinates. */ 01075 rangeCoorTimes3(rangeList); 01076 slSort(&rangeList, gfRangeCmpTarget); 01077 rangeList = gfRangesBundle(rangeList, ffIntronMax); 01078 loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, 01079 isRc, &t3Hash, &tSeqList, &t3RefList); 01080 01081 /* The old range list was not very precise - it was just to get 01082 * the DNA loaded. */ 01083 gfRangeFreeList(&rangeList); 01084 01085 01086 /* Patch up clump list and associated sequence source to refer 01087 * to bits of genome loaded into memory. Create new range list 01088 * by extending hits in clumps. */ 01089 for (frame = 0; frame < 3; ++frame) 01090 { 01091 for (clump = clumps[isRc][frame]; clump != NULL; clump = clump->next) 01092 { 01093 struct gfSeqSource *ss = clump->target; 01094 t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); 01095 ss->seq = t3->trans[frame]; 01096 ss->start = t3->start/3; 01097 ss->end = t3->end/3; 01098 clumpToHspRange(clump, seq, tileSize, frame, t3, &rangeList, TRUE, FALSE); 01099 } 01100 } 01101 slReverse(&rangeList); 01102 slSort(&rangeList, gfRangeCmpTarget); 01103 rangeList = gfRangesBundle(rangeList, ffIntronMax/3); 01104 01105 /* Do detailed alignment of each of the clustered ranges. */ 01106 for (range = rangeList; range != NULL; range = range->next) 01107 { 01108 targetSeq = range->tSeq; 01109 AllocVar(bun); 01110 bun->qSeq = seq; 01111 bun->genoSeq = targetSeq; 01112 bun->ffList = gfRangesToFfItem(range->components, seq); 01113 bun->isProt = TRUE; 01114 t3 = hashMustFindVal(t3Hash, range->tName); 01115 bun->t3List = t3; 01116 ssStitch(bun, ffCdna, minMatch, ssAliCount); 01117 getTargetName(range->tName, out->includeTargetFile, targetName); 01118 saveAlignments(targetName, t3->nibSize, 0, 01119 bun, t3Hash, FALSE, isRc, ffCdna, minMatch, out); 01120 ssBundleFree(&bun); 01121 } 01122 01123 /* Cleanup for this strand of database. */ 01124 gfRangeFreeList(&rangeList); 01125 freeHash(&t3Hash); 01126 for (ref = t3RefList; ref != NULL; ref = ref->next) 01127 { 01128 struct trans3 *t3 = ref->val; 01129 trans3Free(&t3); 01130 } 01131 slFreeList(&t3RefList); 01132 freeDnaSeqList(&tSeqList); 01133 } 01134 01135 /* Final cleanup. */ 01136 for (isRc=0; isRc<=1; ++isRc) 01137 for (frame=0; frame<3; ++frame) 01138 gfClumpFreeList(&clumps[isRc][frame]); 01139 for (ss = ssList; ss != NULL; ss = ss->next) 01140 freeMem(ss->fileName); 01141 slFreeList(&ssList); 01142 lmCleanup(&lm); 01143 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfAlignTransTrans | ( | int * | pConn, | |
| char * | nibDir, | |||
| struct dnaSeq * | seq, | |||
| boolean | qIsRc, | |||
| int | minMatch, | |||
| struct hash * | tFileCache, | |||
| struct gfOutput * | out, | |||
| boolean | isRna | |||
| ) |
Definition at line 1163 of file gfBlatLib.c.
References AllocVar, clumpTargetName(), clumpToHspRange(), FALSE, ffCdna, ffIntronMax, ffLoose, freeDnaSeqList(), freeHash(), freeMem(), getTargetName(), gfQuerySeqTransTrans(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfOutput::includeTargetFile, lm, lmCleanup(), lmInit(), loadHashT3Ranges(), gfSeqSource::next, slRef::next, gfClump::next, PATH_LEN, rangeCoorTimes3(), rangeList, saveAlignments(), trans3::seq, seqClumpToRangeList(), dnaSeq::size, slCat(), slFreeList(), slReverse(), slSort(), ss, ssAliCount, ssBundleFree(), ssStitch(), gfClump::target, gfClump::tEnd, tileSize, trans3::trans, trans3Find(), trans3Free(), trans3New(), TRUE, gfClump::tStart, and untranslateRangeList().
Referenced by doBlat(), and gfClient().
01170 { 01171 struct gfClump *clumps[2][3][3], *clump; 01172 char targetName[PATH_LEN]; 01173 int qFrame, tFrame, tIsRc; 01174 struct gfSeqSource *ssList = NULL, *ss; 01175 struct lm *lm = lmInit(0); 01176 int tileSize; 01177 struct gfRange *rangeList = NULL, *rl, *range; 01178 struct trans3 *qTrans = trans3New(qSeq), *t3; 01179 struct slRef *t3RefList = NULL, *t3Ref; 01180 struct hash *t3Hash = NULL; 01181 struct dnaSeq *tSeqList = NULL; 01182 enum ffStringency stringency = (isRna ? ffCdna : ffLoose); 01183 01184 /* Query server for clumps. */ 01185 gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize); 01186 close(*pConn); 01187 *pConn = -1; 01188 01189 for (tIsRc=0; tIsRc <= 1; ++tIsRc) 01190 { 01191 /* Figure out which ranges need to be loaded and load them. */ 01192 for (qFrame = 0; qFrame < 3; ++qFrame) 01193 { 01194 for (tFrame = 0; tFrame < 3; ++tFrame) 01195 { 01196 rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame); 01197 rangeList = slCat(rangeList, rl); 01198 } 01199 } 01200 rangeCoorTimes3(rangeList); 01201 slSort(&rangeList, gfRangeCmpTarget); 01202 rangeList = gfRangesBundle(rangeList, ffIntronMax); 01203 loadHashT3Ranges(rangeList, tSeqDir, tFileCache, 01204 qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList); 01205 01206 /* The old range list was not very precise - it was just to get 01207 * the DNA loaded. */ 01208 gfRangeFreeList(&rangeList); 01209 01210 /* Patch up clump list and associated sequence source to refer 01211 * to bits of genome loaded into memory. Create new range list 01212 * by extending hits in clumps. */ 01213 for (qFrame = 0; qFrame < 3; ++qFrame) 01214 { 01215 for (tFrame = 0; tFrame < 3; ++tFrame) 01216 { 01217 for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next) 01218 { 01219 struct gfSeqSource *ss = clump->target; 01220 struct gfRange *rangeSet = NULL; 01221 t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); 01222 ss->seq = t3->trans[tFrame]; 01223 ss->start = t3->start/3; 01224 ss->end = t3->end/3; 01225 clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE); 01226 untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start); 01227 rangeList = slCat(rangeSet, rangeList); 01228 } 01229 } 01230 } 01231 slReverse(&rangeList); 01232 slSort(&rangeList, gfRangeCmpTarget); 01233 rangeList = gfRangesBundle(rangeList, ffIntronMax); 01234 01235 for (range = rangeList; range != NULL; range = range->next) 01236 { 01237 struct dnaSeq *targetSeq = range->tSeq; 01238 struct ssBundle *bun; 01239 01240 AllocVar(bun); 01241 bun->qSeq = qSeq; 01242 bun->genoSeq = targetSeq; 01243 bun->ffList = gfRangesToFfItem(range->components, qSeq); 01244 ssStitch(bun, stringency, minMatch, ssAliCount); 01245 getTargetName(range->tName, out->includeTargetFile, targetName); 01246 t3 = range->t3; 01247 saveAlignments(targetName, t3->nibSize, t3->start, 01248 bun, NULL, qIsRc, tIsRc, stringency, minMatch, out); 01249 ssBundleFree(&bun); 01250 } 01251 01252 /* Cleanup for this strand of database. */ 01253 gfRangeFreeList(&rangeList); 01254 freeHash(&t3Hash); 01255 for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next) 01256 { 01257 struct trans3 *t3 = t3Ref->val; 01258 trans3Free(&t3); 01259 } 01260 slFreeList(&t3RefList); 01261 freeDnaSeqList(&tSeqList); 01262 } 01263 trans3Free(&qTrans); 01264 for (ss = ssList; ss != NULL; ss = ss->next) 01265 freeMem(ss->fileName); 01266 slFreeList(&ssList); 01267 lmCleanup(&lm); 01268 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfCatchPipes | ( | ) |
Definition at line 38 of file genoFind.c.
References gfPipeHandler().
Referenced by main().
00040 { 00041 signal(SIGPIPE, gfPipeHandler); 00042 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfCheckTileSize | ( | int | tileSize, | |
| boolean | isPep | |||
| ) |
Definition at line 97 of file genoFind.c.
References errAbort().
Referenced by gfNewEmpty(), and main().
00099 { 00100 if (isPep) 00101 { 00102 if (tileSize < 3 || tileSize > 8) 00103 errAbort("protein tileSize must be between 3 and 8"); 00104 } 00105 else 00106 { 00107 if (tileSize < 6 || tileSize > 18) 00108 errAbort("DNA tileSize must be between 6 and 18"); 00109 } 00110 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 973 of file genoFind.c.
References gfClump::hitCount, gfClump::hitList, name, gfHit::next, gfClump::qEnd, gfClump::qStart, ss, gfClump::target, gfClump::tEnd, and gfClump::tStart.
Referenced by genoFindDirect(), and genoPcrDirect().
00975 { 00976 struct gfSeqSource *ss = clump->target; 00977 char *name = ss->fileName; 00978 00979 if (name == NULL) name = ss->seq->name; 00980 fprintf(f, "%u-%u %s %u-%u, hits %d\n", 00981 clump->qStart, clump->qEnd, name, 00982 clump->tStart - ss->start, clump->tEnd - ss->start, 00983 clump->hitCount); 00984 #ifdef SOMETIMES 00985 for (hit = clump->hitList; hit != NULL; hit = hit->next) 00986 fprintf(f, " q %d, t %d, diag %d\n", hit->qStart, hit->tStart, hit->diagonal); 00987 #endif 00988 }
Here is the caller graph for this function:

| void gfClumpFree | ( | struct gfClump ** | pClump | ) |
Definition at line 951 of file genoFind.c.
References freez().
Referenced by clumpNear(), gfClumpFreeList(), and targetClump().
00953 { 00954 struct gfClump *clump; 00955 if ((clump = *pClump) == NULL) 00956 return; 00957 freez(pClump); 00958 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfClumpFreeList | ( | struct gfClump ** | pList | ) |
Definition at line 960 of file genoFind.c.
References gfClumpFree(), and gfClump::next.
Referenced by dnaQuery(), ffSeedExtInMem(), genoFindDirect(), gfAlignTrans(), gfFindAlignAaTrans(), gfTransTransFindBundles(), pcrQuery(), searchOneProt(), transQuery(), and transTransQuery().
00962 { 00963 struct gfClump *el, *next; 00964 00965 for (el = *pList; el != NULL; el = next) 00966 { 00967 next = el->next; 00968 gfClumpFree(&el); 00969 } 00970 *pList = NULL; 00971 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int gfConnect | ( | char * | hostName, | |
| char * | portName | |||
| ) |
Definition at line 10 of file gfNet.c.
References errnoAbort(), and netConnect().
Referenced by doBlat(), gfClient(), and gfPcrGetRanges().
00012 { 00013 /* Connect to server. */ 00014 int sd = netConnect(hostName, atoi(portName)); 00015 if (sd < 0) 00016 { 00017 errnoAbort("Sorry, the BLAT/iPCR server seems to be down. Please try " 00018 "again later."); 00019 } 00020 return sd; 00021 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfFileCacheFree | ( | struct hash ** | pCache | ) |
Definition at line 526 of file gfBlatLib.c.
References gfFileCacheFreeEl(), hashFree, and hashTraverseEls().
Referenced by gfClient(), and gfPcrViaNet().
00528 { 00529 struct hash *cache = *pCache; 00530 if (cache != NULL) 00531 { 00532 hashTraverseEls(cache, gfFileCacheFreeEl); 00533 hashFree(pCache); 00534 } 00535 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct hash* gfFileCacheNew | ( | ) | [read] |
Definition at line 503 of file gfBlatLib.c.
References hashNew.
Referenced by doBlat(), gfClient(), and gfPcrViaNet().
00505 { 00506 return hashNew(0); 00507 }
Here is the caller graph for this function:

| void gfFindAlignAaTrans | ( | struct genoFind * | gfs[3], | |
| aaSeq * | qSeq, | |||
| struct hash * | t3Hash, | |||
| boolean | tIsRc, | |||
| int | minMatch, | |||
| struct gfOutput * | out | |||
| ) |
Definition at line 938 of file gfBlatLib.c.
References AllocVar, clumpToHspRange(), FALSE, ffCdna, ffIntronMax, ssBundle::ffList, ssBundle::genoSeq, gfClumpFreeList(), gfRangeCmpTarget(), gfRangeFreeList(), gfRangesBundle(), gfRangesToFfItem(), gfTransFindClumps(), hashMustFindVal(), ssBundle::isProt, lm, lmCleanup(), lmInit(), dnaSeq::name, gfClump::next, ssBundle::qSeq, rangeList, saveAlignments(), trans3::seq, dnaSeq::size, slReverse(), slSort(), ssAliCount, ssBundleFree(), ssStitch(), ssBundle::t3List, genoFind::tileSize, tileSize, and TRUE.
Referenced by tripleSearch().
00942 { 00943 struct gfClump *clumps[3]; 00944 int frame; 00945 struct gfClump *clump; 00946 struct gfRange *rangeList = NULL, *range; 00947 aaSeq *targetSeq; 00948 struct ssBundle *bun; 00949 int tileSize = gfs[0]->tileSize; 00950 struct trans3 *t3; 00951 int hitCount; 00952 struct lm *lm = lmInit(0); 00953 00954 gfTransFindClumps(gfs, qSeq, clumps, lm, &hitCount); 00955 for (frame=0; frame<3; ++frame) 00956 { 00957 for (clump = clumps[frame]; clump != NULL; clump = clump->next) 00958 { 00959 clumpToHspRange(clump, qSeq, tileSize, frame, NULL, &rangeList, TRUE, FALSE); 00960 } 00961 } 00962 slReverse(&rangeList); 00963 slSort(&rangeList, gfRangeCmpTarget); 00964 rangeList = gfRangesBundle(rangeList, ffIntronMax/3); 00965 for (range = rangeList; range != NULL; range = range->next) 00966 { 00967 targetSeq = range->tSeq; 00968 t3 = hashMustFindVal(t3Hash, targetSeq->name); 00969 AllocVar(bun); 00970 bun->qSeq = qSeq; 00971 bun->genoSeq = targetSeq; 00972 bun->ffList = gfRangesToFfItem(range->components, qSeq); 00973 bun->isProt = TRUE; 00974 bun->t3List = t3; 00975 ssStitch(bun, ffCdna, minMatch, ssAliCount); 00976 saveAlignments(targetSeq->name, t3->seq->size, 0, 00977 bun, t3Hash, FALSE, tIsRc, ffCdna, minMatch, out); 00978 ssBundleFree(&bun); 00979 } 00980 gfRangeFreeList(&rangeList); 00981 for (frame=0; frame<3; ++frame) 00982 gfClumpFreeList(&clumps[frame]); 00983 lmCleanup(&lm); 00984 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfClump* gfFindClumps | ( | struct genoFind * | gf, | |
| struct dnaSeq * | seq, | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) | [read] |
Definition at line 1890 of file genoFind.c.
References gfFindClumpsWithQmask(), and lm.
Referenced by dnaQuery(), genoFindDirect(), gfTransFindClumps(), and searchOneProt().
01892 { 01893 return gfFindClumpsWithQmask(gf, seq, NULL, 0, lm, retHitCount); 01894 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfClump* gfFindClumpsWithQmask | ( | struct genoFind * | gf, | |
| bioSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) | [read] |
Definition at line 1849 of file genoFind.c.
References clumpHits(), cmpQuerySize, gfFindHitsWithQmask(), lm, genoFind::minMatch, minMatch, dnaSeq::size, and genoFind::tileSize.
Referenced by ffSeedExtInMem(), and gfFindClumps().
01853 { 01854 struct gfClump *clumpList = NULL; 01855 struct gfHit *hitList; 01856 int minMatch = gf->minMatch; 01857 01858 #ifdef OLD /* stepSize makes this obsolete. */ 01859 if (seq->size < gf->tileSize * (gf->minMatch+1)) 01860 minMatch = 1; 01861 #endif /* OLD */ 01862 01863 hitList = gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm, 01864 retHitCount, NULL, 0, 0); 01865 cmpQuerySize = seq->size; 01866 clumpList = clumpHits(gf, hitList, minMatch); 01867 return clumpList; 01868 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfHit* gfFindHitsInRegion | ( | struct genoFind * | gf, | |
| bioSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [read] |
Definition at line 1870 of file genoFind.c.
References gfFindHitsWithQmask(), lm, gfHit::next, gfSeqSource::start, and gfHit::tStart.
Referenced by scanIndexForSmallExons().
01877 { 01878 int targetStart; 01879 struct gfHit *hitList, *hit; 01880 int hitCount; 01881 01882 targetStart = target->start; 01883 hitList = gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm, 01884 &hitCount, target, tMin + targetStart, tMax + targetStart); 01885 for (hit = hitList; hit != NULL; hit = hit->next) 01886 hit->tStart -= targetStart; 01887 return hitList; 01888 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfSeqSource* gfFindNamedSource | ( | struct genoFind * | gf, | |
| char * | name | |||
| ) | [read] |
Definition at line 2010 of file genoFind.c.
References gfSeqSource::fileName, dnaSeq::name, sameString, gfSeqSource::seq, genoFind::sourceCount, genoFind::sources, and splitPath().
Referenced by refineBundle().
02012 { 02013 struct gfSeqSource *source = gf->sources; 02014 int count = gf->sourceCount; 02015 02016 if (source->seq == NULL) /* Use first source to see if seq or file. */ 02017 { 02018 char rootName[256]; 02019 while (--count >= 0) 02020 { 02021 splitPath(source->fileName, NULL, rootName, NULL); 02022 if (sameString(name, rootName)) 02023 return source; 02024 } 02025 } 02026 else 02027 { 02028 while (--count >= 0) 02029 { 02030 if (sameString(source->seq->name, name)) 02031 return source; 02032 source += 1; 02033 } 02034 } 02035 return NULL; 02036 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct genoFind* gfIndexNibsAndTwoBits | ( | int | fileCount, | |
| char * | fileNames[], | |||
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | allowOneMismatch, | |||
| int | stepSize | |||
| ) | [read] |
Definition at line 514 of file genoFind.c.
References AllocArray, baseCount, bits32, cloneString(), dnaSeqFree, errAbort(), FALSE, gfAddSeq(), gfAddTilesInNib(), gfAllocLists(), gfCountTilesInNib(), gfCountTilesInTwoBit(), gfNewEmpty(), gfZeroNonOverused(), gfZeroOverused(), twoBitFile::indexList, maxTotalBases(), twoBitIndex::name, twoBitIndex::next, nibIsFile(), PATH_LEN, safef(), dnaSeq::size, genoFind::sourceCount, genoFind::sources, ss, genoFind::tileSize, genoFind::totalSeqSize, twoBitClose(), twoBitIsFile(), twoBitOpen(), and twoBitReadSeqFragLower().
Referenced by genoFindDirect(), genoPcrDirect(), and startServer().
00525 { 00526 struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, 00527 maxPat, oocFile, FALSE, allowOneMismatch); 00528 int i; 00529 bits32 offset = 0, nibSize; 00530 char *fileName; 00531 struct gfSeqSource *ss; 00532 long long totalBases = 0, warnAt = maxTotalBases(); 00533 int totalSeq = 0; 00534 00535 if (allowOneMismatch) 00536 errAbort("Don't currently support allowOneMismatch in gfIndexNibsAndTwoBits"); 00537 if (stepSize == 0) 00538 stepSize = gf->tileSize; 00539 for (i=0; i<fileCount; ++i) 00540 { 00541 fileName = fileNames[i]; 00542 if (twoBitIsFile(fileName)) 00543 { 00544 int seqCount; 00545 long long baseCount; 00546 gfCountTilesInTwoBit(gf, stepSize, fileName, &seqCount, &baseCount); 00547 totalBases += baseCount; 00548 totalSeq += seqCount; 00549 } 00550 else if (nibIsFile(fileName)) 00551 { 00552 totalBases += gfCountTilesInNib(gf, stepSize, fileName); 00553 totalSeq += 1; 00554 } 00555 else 00556 errAbort("Unrecognized file type %s", fileName); 00557 /* Warn if they exceed 4 gig. */ 00558 if (totalBases >= warnAt) 00559 errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that."); 00560 } 00561 gfAllocLists(gf); 00562 gfZeroNonOverused(gf); 00563 AllocArray(gf->sources, totalSeq); 00564 gf->sourceCount = totalSeq; 00565 ss = gf->sources; 00566 for (i=0; i<fileCount; ++i) 00567 { 00568 fileName = fileNames[i]; 00569 if (nibIsFile(fileName)) 00570 { 00571 nibSize = gfAddTilesInNib(gf, fileName, offset, stepSize); 00572 ss->fileName = fileName; 00573 ss->start = offset; 00574 offset += nibSize; 00575 ss->end = offset; 00576 ++ss; 00577 } 00578 else 00579 { 00580 struct twoBitFile *tbf = twoBitOpen(fileName); 00581 struct twoBitIndex *index; 00582 char nameBuf[PATH_LEN+256]; 00583 for (index = tbf->indexList; index != NULL; index = index->next) 00584 { 00585 struct dnaSeq *seq = twoBitReadSeqFragLower(tbf, index->name, 0,0); 00586 gfAddSeq(gf, seq, offset); 00587 safef(nameBuf, sizeof(nameBuf), "%s:%s", fileName, index->name); 00588 ss->fileName = cloneString(nameBuf); 00589 ss->start = offset; 00590 offset += seq->size; 00591 ss->end = offset; 00592 ++ss; 00593 dnaSeqFree(&seq); 00594 } 00595 twoBitClose(&tbf); 00596 } 00597 } 00598 gf->totalSeqSize = offset; 00599 gfZeroOverused(gf); 00600 printf("Done adding\n"); 00601 return gf; 00602 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct genoFind* gfIndexSeq | ( | bioSeq * | seqList, | |
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | isPep, | |||
| boolean | allowOneMismatch, | |||
| boolean | maskUpper, | |||
| int | stepSize | |||
| ) | [read] |
Definition at line 907 of file genoFind.c.
References gfLargeIndexSeq(), gfNewEmpty(), gfSmallIndexSeq(), and genoFind::segSize.
Referenced by bigBlat(), and blat().
00913 { 00914 struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, maxPat, 00915 oocFile, isPep, allowOneMismatch); 00916 if (stepSize == 0) 00917 stepSize = tileSize; 00918 if (gf->segSize > 0) 00919 { 00920 gfLargeIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper); 00921 } 00922 else 00923 { 00924 gfSmallIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper); 00925 } 00926 return gf; 00927 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfIndexTransNibsAndTwoBits | ( | struct genoFind * | transGf[2][3], | |
| int | fileCount, | |||
| char * | fileNames[], | |||
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | allowOneMismatch, | |||
| boolean | mask, | |||
| int | stepSize | |||
| ) |
Definition at line 717 of file genoFind.c.
References AllocArray, bits32, errAbort(), freeDnaSeq(), gfAllocLists(), gfNewEmpty(), gfZeroNonOverused(), gfZeroOverused(), twoBitFile::indexList, maskSimplePepRepeat(), maxTotalBases(), twoBitIndex::name, twoBitIndex::next, nibIsFile(), PATH_LEN, readMaskedNib(), readMaskedTwoBit(), safef(), dnaSeq::size, genoFind::totalSeqSize, transCountBothStrands(), transIndexBothStrands(), TRUE, twoBitCheckTotalSize(), twoBitClose(), twoBitIsFile(), and twoBitOpen().
Referenced by startServer().
00722 { 00723 struct genoFind *gf; 00724 int i,isRc, frame; 00725 bits32 offset[2][3]; 00726 char *fileName; 00727 struct dnaSeq *seq; 00728 int sourceCount = 0; 00729 long long totalBases = 0, warnAt = maxTotalBases(); 00730 00731 if (allowOneMismatch) 00732 errAbort("Don't currently support allowOneMismatch in gfIndexTransNibsAndTwoBits"); 00733 /* Allocate indices for all reading frames. */ 00734 for (isRc=0; isRc <= 1; ++isRc) 00735 { 00736 for (frame = 0; frame < 3; ++frame) 00737 { 00738 transGf[isRc][frame] = gf = gfNewEmpty(minMatch, maxGap, 00739 tileSize, stepSize, maxPat, oocFile, TRUE, allowOneMismatch); 00740 } 00741 } 00742 00743 /* Mask simple AA repeats (of period 1 and 2). */ 00744 for (isRc = 0; isRc <= 1; ++isRc) 00745 for (frame = 0; frame < 3; ++frame) 00746 maskSimplePepRepeat(transGf[isRc][frame]); 00747 00748 /* Scan through .nib and .2bit files once counting tiles. */ 00749 for (i=0; i<fileCount; ++i) 00750 { 00751 fileName = fileNames[i]; 00752 printf("Counting %s\n", fileName); 00753 if (nibIsFile(fileName)) 00754 { 00755 seq = readMaskedNib(fileName, doMask); 00756 transCountBothStrands(seq, transGf); 00757 sourceCount += 1; 00758 totalBases += seq->size; 00759 freeDnaSeq(&seq); 00760 } 00761 else if (twoBitIsFile(fileName)) 00762 { 00763 struct twoBitFile *tbf = twoBitOpen(fileName); 00764 struct twoBitIndex *index; 00765 totalBases += twoBitCheckTotalSize(tbf); 00766 00767 for (index = tbf->indexList; index != NULL; index = index->next) 00768 { 00769 seq = readMaskedTwoBit(tbf, index->name, doMask); 00770 transCountBothStrands(seq, transGf); 00771 sourceCount += 1; 00772 freeDnaSeq(&seq); 00773 } 00774 twoBitClose(&tbf); 00775 } 00776 else 00777 errAbort("Unrecognized file type %s", fileName); 00778 if (totalBases >= warnAt) 00779 errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that."); 00780 } 00781 00782 /* Get space for entries in indexed of all reading frames. */ 00783 for (isRc=0; isRc <= 1; ++isRc) 00784 { 00785 for (frame = 0; frame < 3; ++frame) 00786 { 00787 gf = transGf[isRc][frame]; 00788 gfAllocLists(gf); 00789 gfZeroNonOverused(gf); 00790 AllocArray(gf->sources, sourceCount); 00791 gf->sourceCount = sourceCount; 00792 offset[isRc][frame] = 0; 00793 } 00794 } 00795 00796 /* Scan through nibs a second time building index. */ 00797 sourceCount = 0; 00798 for (i=0; i<fileCount; ++i) 00799 { 00800 fileName = fileNames[i]; 00801 printf("Indexing %s\n", fileName); 00802 if (nibIsFile(fileName)) 00803 { 00804 seq = readMaskedNib(fileName, doMask); 00805 transIndexBothStrands(seq, transGf, offset, sourceCount, fileName); 00806 freeDnaSeq(&seq); 00807 sourceCount += 1; 00808 } 00809 else /* .2bit file */ 00810 { 00811 struct twoBitFile *tbf = twoBitOpen(fileName); 00812 struct twoBitIndex *index; 00813 for (index = tbf->indexList; index != NULL; index = index->next) 00814 { 00815 char nameBuf[PATH_LEN+256]; 00816 safef(nameBuf, sizeof(nameBuf), "%s:%s", fileName, index->name); 00817 seq = readMaskedTwoBit(tbf, index->name, doMask); 00818 transIndexBothStrands(seq, transGf, offset, sourceCount, nameBuf); 00819 sourceCount += 1; 00820 freeDnaSeq(&seq); 00821 } 00822 twoBitClose(&tbf); 00823 } 00824 } 00825 00826 for (isRc=0; isRc <= 1; ++isRc) 00827 { 00828 for (frame = 0; frame < 3; ++frame) 00829 { 00830 gf = transGf[isRc][frame]; 00831 gf->totalSeqSize = offset[isRc][frame]; 00832 gfZeroOverused(gf); 00833 } 00834 } 00835 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfLongDnaInMem | ( | struct dnaSeq * | query, | |
| struct genoFind * | gf, | |||
| boolean | isRc, | |||
| int | minScore, | |||
| Bits * | qMaskBits, | |||
| struct gfOutput * | out, | |||
| boolean | fastMap, | |||
| boolean | band | |||
| ) |
Definition at line 1478 of file gfBlatLib.c.
References lmInit(), newHash(), rangeList, and dnaSeq::size.
Referenced by searchOneStrand().
01483 { 01484 int hitCount; 01485 int maxSize = 5000; 01486 int preferredSize = 4500; 01487 int overlapSize = 250; 01488 struct dnaSeq subQuery = *query; 01489 struct lm *lm = lmInit(0); 01490 int subOffset, subSize, nextOffset; 01491 DNA saveEnd, *endPos; 01492 struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun; 01493 struct hash *bunHash = newHash(8); 01494 01495 for (subOffset = 0; subOffset<query->size; subOffset = nextOffset) 01496 { 01497 struct gfClump *clumpList; 01498 struct gfRange *rangeList = NULL; 01499 01500 /* Figure out size of this piece. If query is 01501 * maxSize or less do it all. Otherwise just 01502 * do prefered size, and set it up to overlap 01503 * with surrounding pieces by overlapSize. */ 01504 if (subOffset == 0 && query->size <= maxSize) 01505 nextOffset = subSize = query->size; 01506 else 01507 { 01508 subSize = preferredSize; 01509 if (subSize + subOffset >= query->size) 01510 { 01511 subSize = query->size - subOffset; 01512 nextOffset = query->size; 01513 } 01514 else 01515 { 01516 nextOffset = subOffset + preferredSize - overlapSize; 01517 } 01518 } 01519 subQuery.dna = query->dna + subOffset; 01520 subQuery.size = subSize; 01521 endPos = &subQuery.dna[subSize]; 01522 saveEnd = *endPos; 01523 *endPos = 0; 01524 if (band) 01525 { 01526 oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc); 01527 } 01528 else 01529 { 01530 clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount); 01531 if (fastMap) 01532 { 01533 oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery); 01534 } 01535 else 01536 { 01537 oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList); 01538 gfRangeFreeList(&rangeList); 01539 } 01540 gfClumpFreeList(&clumpList); 01541 } 01542 addToBigBundleList(&oneBunList, bunHash, &bigBunList, query); 01543 *endPos = saveEnd; 01544 } 01545 #ifdef DEBUG 01546 dumpBunList(bigBunList); 01547 #endif /* DEBUG */ 01548 for (bun = bigBunList; bun != NULL; bun = bun->next) 01549 { 01550 ssStitch(bun, ffCdna, minScore, ssAliCount); 01551 if (!fastMap && !band) 01552 refineSmallExonsInBundle(bun); 01553 saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 01554 bun, NULL, isRc, FALSE, ffCdna, minScore, out); 01555 } 01556 ssBundleFreeList(&bigBunList); 01557 freeHash(&bunHash); 01558 lmCleanup(&lm); 01559 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfLongTransTransInMem | ( | struct dnaSeq * | query, | |
| struct genoFind * | gfs[3], | |||
| struct hash * | t3Hash, | |||
| boolean | qIsRc, | |||
| boolean | tIsRc, | |||
| boolean | qIsRna, | |||
| int | minScore, | |||
| struct gfOutput * | out | |||
| ) |
Definition at line 1562 of file gfBlatLib.c.
References ffCdna, ffLoose, newHash(), and dnaSeq::size.
Referenced by transTripleSearch().
01567 { 01568 enum ffStringency stringency = (qIsRna ? ffCdna : ffLoose); 01569 int maxSize = 1500; 01570 int preferredSize = 1200; /* PreferredSize - overlapSize might need to be multiple of 3. */ 01571 int overlapSize = 270; 01572 struct dnaSeq subQuery = *query; 01573 int subOffset, subSize, nextOffset; 01574 DNA saveEnd, *endPos; 01575 struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun; 01576 struct hash *bunHash = newHash(8); 01577 01578 for (subOffset = 0; subOffset<query->size; subOffset = nextOffset) 01579 { 01580 /* Figure out size of this piece. If query is 01581 * maxSize or less do it all. Otherwise just 01582 * do prefered size, and set it up to overlap 01583 * with surrounding pieces by overlapSize. */ 01584 if (subOffset == 0 && query->size <= maxSize) 01585 nextOffset = subSize = query->size; 01586 else 01587 { 01588 subSize = preferredSize; 01589 if (subSize + subOffset >= query->size) 01590 { 01591 subSize = query->size - subOffset; 01592 nextOffset = query->size; 01593 } 01594 else 01595 { 01596 nextOffset = subOffset + preferredSize - overlapSize; 01597 } 01598 } 01599 subQuery.dna = query->dna + subOffset; 01600 subQuery.size = subSize; 01601 endPos = &subQuery.dna[subSize]; 01602 saveEnd = *endPos; 01603 *endPos = 0; 01604 oneBunList = gfTransTransFindBundles(gfs, &subQuery, t3Hash, qIsRc, minScore, qIsRna); 01605 addToBigBundleList(&oneBunList, bunHash, &bigBunList, query); 01606 *endPos = saveEnd; 01607 } 01608 for (bun = bigBunList; bun != NULL; bun = bun->next) 01609 { 01610 ssStitch(bun, ffCdna, minScore, ssAliCount); 01611 saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 01612 bun, NULL, qIsRc, tIsRc, stringency, minScore, out); 01613 } 01614 hashFree(&bunHash); 01615 ssBundleFreeList(&bigBunList); 01616 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfMakeOoc | ( | char * | outName, | |
| char * | files[], | |||
| int | fileCount, | |||
| int | tileSize, | |||
| bits32 | maxPat, | |||
| enum gfType | tType | |||
| ) |
Definition at line 1928 of file genoFind.c.
References bits32, carefulClose(), dnaSeq::dna, errAbort(), FALSE, faReadAllSeq(), freeDnaSeqList(), genoFindFree(), gfCountSeq(), gfMaxGap, gfMinMatch, gfNewEmpty(), gftDnaX, gftProt, gftRnaX, genoFind::listSizes, mustOpen(), dnaSeq::next, nibIsFile(), nibLoadAll(), oocSig, reverseComplement(), genoFind::segSize, dnaSeq::size, genoFind::tileSpaceSize, toLowerN(), trans3Free(), trans3New(), twoBitIsFile(), twoBitLoadAll(), and writeOne.
Referenced by blat().
01931 { 01932 boolean dbIsPep = (tType == gftProt || tType == gftDnaX || tType == gftRnaX); 01933 struct genoFind *gf = gfNewEmpty(gfMinMatch, gfMaxGap, tileSize, tileSize, 01934 maxPat, NULL, dbIsPep, FALSE); 01935 bits32 *sizes = gf->listSizes; 01936 int tileSpaceSize = gf->tileSpaceSize; 01937 bioSeq *seq, *seqList; 01938 bits32 sig = oocSig, psz = tileSize; 01939 bits32 i; 01940 int oocCount = 0; 01941 char *inName; 01942 FILE *f = mustOpen(outName, "w"); 01943 01944 if (gf->segSize > 0) 01945 errAbort("Don't yet know how to make ooc files for large tile sizes."); 01946 for (i=0; i<fileCount; ++i) 01947 { 01948 inName = files[i]; 01949 printf("Loading %s\n", inName); 01950 if (nibIsFile(inName)) 01951 { 01952 seqList = nibLoadAll(inName); 01953 } 01954 else if (twoBitIsFile(inName)) 01955 { 01956 seqList = twoBitLoadAll(inName); 01957 for (seq = seqList; seq != NULL; seq = seq->next) 01958 toLowerN(seq->dna, seq->size); 01959 } 01960 else 01961 { 01962 seqList = faReadAllSeq(inName, tType != gftProt); 01963 } 01964 printf("Counting %s\n", inName); 01965 for (seq = seqList; seq != NULL; seq = seq->next) 01966 { 01967 int isRc; 01968 for (isRc = 0; isRc <= 1; ++isRc) 01969 { 01970 if (tType == gftDnaX || tType == gftRnaX) 01971 { 01972 struct trans3 *t3 = trans3New(seq); 01973 int frame; 01974 for (frame=0; frame<3; ++frame) 01975 { 01976 gfCountSeq(gf, t3->trans[frame]); 01977 } 01978 trans3Free(&t3); 01979 } 01980 else 01981 { 01982 gfCountSeq(gf, seq); 01983 } 01984 if (tType == gftProt || tType == gftRnaX) 01985 break; 01986 else 01987 { 01988 reverseComplement(seq->dna, seq->size); 01989 } 01990 } 01991 } 01992 freeDnaSeqList(&seqList); 01993 } 01994 printf("Writing %s\n", outName); 01995 writeOne(f, sig); 01996 writeOne(f, psz); 01997 for (i=0; i<tileSpaceSize; ++i) 01998 { 01999 if (sizes[i] >= maxPat) 02000 { 02001 writeOne(f, i); 02002 ++oocCount; 02003 } 02004 } 02005 carefulClose(&f); 02006 genoFindFree(&gf); 02007 printf("Wrote %d overused %d-mers to %s\n", oocCount, tileSize, outName); 02008 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfOutput* gfOutputAny | ( | char * | format, | |
| int | goodPpt, | |||
| boolean | qIsProt, | |||
| boolean | tIsProt, | |||
| boolean | noHead, | |||
| char * | databaseName, | |||
| int | databaseSeqCount, | |||
| double | databaseLetters, | |||
| double | minIdentity, | |||
| FILE * | f | |||
| ) | [read] |
Definition at line 508 of file gfOut.c.
References ArraySize, errAbort(), FALSE, gfOutputAxt(), gfOutputBlast(), gfOutputMaf(), gfOutputPsl(), gfOutputSim4(), gfOutput::out, sameWord, stringArrayIx(), and TRUE.
Referenced by blat(), and gfClient().
00515 : 00516 * format - either 'psl', 'pslx', 'sim4', 'blast', 'wublast', 'axt', 'xml' 00517 * goodPpt - minimum identity of alignments to output in parts per thousand 00518 * qIsProt - true if query side is a protein. 00519 * tIsProt - true if target (database) side is a protein. 00520 * noHead - if true suppress header in psl/pslx output. 00521 * databaseName - name of database. Only used for blast output 00522 * databaseSeq - number of sequences in database - only for blast 00523 * databaseLetters - number of bases/aas in database - only blast 00524 * minIdentity - minimum identity - only blast 00525 * FILE *f - file. 00526 */ 00527 { 00528 struct gfOutput *out = NULL; 00529 static char *blastTypes[] = {"blast", "wublast", "blast8", "blast9", "xml"}; 00530 00531 if (format == NULL) 00532 format = "psl"; 00533 if (sameWord(format, "psl")) 00534 out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, FALSE, noHead); 00535 else if (sameWord(format, "pslx")) 00536 out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, TRUE, noHead); 00537 else if (sameWord(format, "sim4")) 00538 out = gfOutputSim4(goodPpt, qIsProt, tIsProt, databaseName); 00539 else if (stringArrayIx(format, blastTypes, ArraySize(blastTypes)) >= 0) 00540 out = gfOutputBlast(goodPpt, qIsProt, tIsProt, 00541 databaseName, databaseSeqCount, databaseLetters, format, 00542 minIdentity, f); 00543 else if (sameWord(format, "axt")) 00544 out = gfOutputAxt(goodPpt, qIsProt, tIsProt, f); 00545 else if (sameWord(format, "maf")) 00546 out = gfOutputMaf(goodPpt, qIsProt, tIsProt, f); 00547 else 00548 errAbort("Unrecognized output format '%s'", format); 00549 return out; 00550 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfOutput* gfOutputAxt | ( | int | goodPpt, | |
| boolean | qIsProt, | |||
| boolean | tIsProt, | |||
| FILE * | f | |||
| ) | [read] |
Definition at line 459 of file gfOut.c.
References axtQueryOut(), gfOutputAxtMem(), and gfOutput::out.
Referenced by gfOutputAny().
00462 { 00463 struct gfOutput *out = gfOutputAxtMem(goodPpt, qIsProt, tIsProt); 00464 out->queryOut = axtQueryOut; 00465 return out; 00466 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfOutput* gfOutputAxtMem | ( | int | goodPpt, | |
| boolean | qIsProt, | |||
| boolean | tIsProt | |||
| ) | [read] |
Definition at line 447 of file gfOut.c.
References AllocVar, gfOutput::data, gfOutputInit(), gfOutput::out, and saveAxtBundle().
Referenced by gfOutputAxt(), gfOutputBlast(), gfOutputMaf(), and gfOutputSim4().
00450 { 00451 struct gfOutput *out = gfOutputInit(goodPpt, qIsProt, tIsProt); 00452 struct axtData *ad; 00453 AllocVar(ad); 00454 out->out = saveAxtBundle; 00455 out->data = ad; 00456 return out; 00457 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfOutput* gfOutputBlast | ( | int | goodPpt, | |
| boolean | qIsProt, | |||
| boolean | tIsProt, | |||
| char * | databaseName, | |||
| int | databaseSeqCount, | |||
| double | databaseLetters, | |||
| char * | blastType, | |||
| double | minIdentity, | |||
| FILE * | f | |||
| ) | [read] |
Definition at line 491 of file gfOut.c.
References blastQueryOut(), axtData::blastType, gfOutput::data, axtData::databaseLetters, axtData::databaseName, axtData::databaseSeqCount, gfOutputAxtMem(), axtData::minIdentity, gfOutput::out, and gfOutput::queryOut.
Referenced by gfOutputAny().
00496 { 00497 struct gfOutput *out = gfOutputAxtMem(goodPpt, qIsProt, tIsProt); 00498 struct axtData *ad = out->data; 00499 ad->databaseName = databaseName; 00500 ad->databaseSeqCount = databaseSeqCount; 00501 ad->databaseLetters = databaseLetters; 00502 ad->blastType = blastType; 00503 ad->minIdentity = minIdentity; 00504 out->queryOut = blastQueryOut; 00505 return out; 00506 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfOutputFree | ( | struct gfOutput ** | pOut | ) |
| void gfOutputHead | ( | struct gfOutput * | out, | |
| FILE * | f | |||
| ) |
Definition at line 560 of file gfOut.c.
References gfOutput::out.
Referenced by gfClient(), and searchOneIndex().
Here is the caller graph for this function:

| struct gfOutput* gfOutputPsl | ( | int | goodPpt, | |
| boolean | qIsProt, | |||
| boolean | tIsProt, | |||
| FILE * | f, | |||
| boolean | saveSeq, | |||
| boolean | noHead | |||
| ) | [read] |
Definition at line 429 of file gfOut.c.
References AllocVar, gfOutput::data, pslxData::f, gfOutput::fileHead, gfOutputInit(), gfOutput::out, pslHead(), pslOut(), and pslxData::saveSeq.
Referenced by doBlat(), and gfOutputAny().
00433 { 00434 struct gfOutput *out = gfOutputInit(goodPpt, qIsProt, tIsProt); 00435 struct pslxData *pslData; 00436 00437 AllocVar(pslData); 00438 pslData->saveSeq = saveSeq; 00439 pslData->f = f; 00440 out->out = pslOut; 00441 out->data = pslData; 00442 if (!noHead) 00443 out->fileHead = pslHead; 00444 return out; 00445 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfOutputQuery | ( | struct gfOutput * | out, | |
| FILE * | f | |||
| ) |
Definition at line 552 of file gfOut.c.
References gfOutput::out.
Referenced by bigBlat(), doBlat(), gfClient(), and searchOne().
Here is the caller graph for this function:

| struct gfClump* gfPcrClumps | ( | struct genoFind * | gf, | |
| char * | fPrimer, | |||
| int | fPrimerSize, | |||
| char * | rPrimer, | |||
| int | rPrimerSize, | |||
| int | minDistance, | |||
| int | maxDistance | |||
| ) | [read] |
Definition at line 2155 of file genoFind.c.
References errAbort(), genoFind::isPep, pcrClumps(), reverseComplement(), genoFind::segSize, and tolowers().
Referenced by genoPcrDirect(), and pcrQuery().
02158 { 02159 struct gfClump *clumpList; 02160 if (gf->segSize > 0) 02161 errAbort("Can't do PCR on large tile sizes"); 02162 if (gf->isPep) 02163 errAbort("Can't do PCR on protein/translated index"); 02164 tolowers(fPrimer); 02165 tolowers(rPrimer); 02166 reverseComplement(rPrimer, rPrimerSize); 02167 clumpList = pcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 02168 minDistance, maxDistance); 02169 reverseComplement(rPrimer, rPrimerSize); 02170 return clumpList; 02171 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int gfReadMulti | ( | int | sd, | |
| void * | vBuf, | |||
| size_t | size | |||
| ) |
Definition at line 44 of file genoFind.c.
Referenced by startServer().
00046 { 00047 char *buf = vBuf; 00048 size_t totalRead = 0; 00049 int oneRead; 00050 00051 while (totalRead < size) 00052 { 00053 oneRead = read(sd, buf + totalRead, size - totalRead); 00054 if (oneRead < 0) 00055 { 00056 perror("Couldn't finish large read"); 00057 return oneRead; 00058 } 00059 else if (oneRead == 0) 00060 /* Avoid an infinite loop when the client closed the socket. */ 00061 break; 00062 totalRead += oneRead; 00063 } 00064 return totalRead; 00065 }
Here is the caller graph for this function:

| char* gfSignature | ( | ) |
Definition at line 22 of file genoFind.c.
Referenced by getFileList(), gfPcrGetRanges(), pcrServer(), queryServer(), startSeqQuery(), startServer(), statusServer(), and stopServer().
Here is the caller graph for this function:

| void gfTransFindClumps | ( | struct genoFind * | gfs[3], | |
| aaSeq * | seq, | |||
| struct gfClump * | clumps[3], | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) |
Definition at line 1897 of file genoFind.c.
References gfFindClumps(), gfClump::hitCount, and lm.
Referenced by gfFindAlignAaTrans(), gfTransTransFindClumps(), and transQuery().
01899 { 01900 int frame; 01901 int oneHit; 01902 int hitCount = 0; 01903 for (frame = 0; frame < 3; ++frame) 01904 { 01905 clumps[frame] = gfFindClumps(gfs[frame], seq, lm, &oneHit); 01906 hitCount += oneHit; 01907 } 01908 *retHitCount = hitCount; 01909 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfTransTransFindClumps | ( | struct genoFind * | gfs[3], | |
| aaSeq * | seqs[3], | |||
| struct gfClump * | clumps[3][3], | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) |
Definition at line 1911 of file genoFind.c.
References gfTransFindClumps(), gfClump::hitCount, and lm.
Referenced by gfTransTransFindBundles(), and transTransQuery().
01915 { 01916 int qFrame; 01917 int oneHit; 01918 int hitCount = 0; 01919 01920 for (qFrame = 0; qFrame<3; ++qFrame) 01921 { 01922 gfTransFindClumps(gfs, seqs[qFrame], clumps[qFrame], lm, &oneHit); 01923 hitCount += oneHit; 01924 } 01925 *retHitCount = hitCount; 01926 }
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2