#include "common.h"#include <signal.h>#include "obscure.h"#include "dnautil.h"#include "dnaseq.h"#include "nib.h"#include "twoBit.h"#include "fa.h"#include "dystring.h"#include "errabort.h"#include "sig.h"#include "ooc.h"#include "genoFind.h"#include "trans3.h"#include "binRange.h"Include dependency graph for genoFind.c:

Go to the source code of this file.
Functions | |
| char * | gfSignature () |
| static void | gfPipeHandler (int sigNum) |
| void | gfCatchPipes () |
| int | gfReadMulti (int sd, void *vBuf, size_t size) |
| void | genoFindFree (struct genoFind **pGenoFind) |
| int | gfPowerOf20 (int n) |
| void | gfCheckTileSize (int tileSize, boolean isPep) |
| static struct genoFind * | gfNewEmpty (int minMatch, int maxGap, int tileSize, int stepSize, int maxPat, char *oocFile, boolean isPep, boolean allowOneMismatch) |
| static void | initNtLookup () |
| int | gfDnaTile (DNA *dna, int n) |
| int | gfPepTile (AA *pep, int n) |
| static void | gfCountSeq (struct genoFind *gf, bioSeq *seq) |
| static int | gfCountTilesInNib (struct genoFind *gf, int stepSize, char *fileName) |
| long long | maxTotalBases () |
| static long long | twoBitCheckTotalSize (struct twoBitFile *tbf) |
| static void | gfCountTilesInTwoBit (struct genoFind *gf, int stepSize, char *fileName, int *retSeqCount, long long *retBaseCount) |
| static int | gfAllocLists (struct genoFind *gf) |
| static int | gfAllocLargeLists (struct genoFind *gf) |
| static void | gfAddSeq (struct genoFind *gf, bioSeq *seq, bits32 offset) |
| static void | gfAddLargeSeq (struct genoFind *gf, bioSeq *seq, bits32 offset) |
| static int | gfAddTilesInNib (struct genoFind *gf, char *fileName, bits32 offset, int stepSize) |
| static void | gfZeroOverused (struct genoFind *gf) |
| static void | gfZeroNonOverused (struct genoFind *gf) |
| genoFind * | gfIndexNibsAndTwoBits (int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, int stepSize) |
| static void | maskSimplePepRepeat (struct genoFind *gf) |
| dnaSeq * | readMaskedNib (char *fileName, boolean mask) |
| dnaSeq * | readMaskedTwoBit (struct twoBitFile *tbf, char *seqName, boolean mask) |
| static void | transCountBothStrands (struct dnaSeq *seq, struct genoFind *transGf[2][3]) |
| static void | transIndexBothStrands (struct dnaSeq *seq, struct genoFind *transGf[2][3], bits32 offset[2][3], int sourceIx, char *fileName) |
| void | gfIndexTransNibsAndTwoBits (struct genoFind *transGf[2][3], int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, boolean doMask, int stepSize) |
| static struct genoFind * | gfSmallIndexSeq (struct genoFind *gf, bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean maskUpper) |
| static struct genoFind * | gfLargeIndexSeq (struct genoFind *gf, bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean maskUpper) |
| genoFind * | gfIndexSeq (bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean allowOneMismatch, boolean maskUpper, int stepSize) |
| static int | bCmpSeqSource (const void *vTarget, const void *vRange) |
| static struct gfSeqSource * | findSource (struct genoFind *gf, bits32 targetPos) |
| void | gfClumpFree (struct gfClump **pClump) |
| void | gfClumpFreeList (struct gfClump **pList) |
| void | gfClumpDump (struct genoFind *gf, struct gfClump *clump, FILE *f) |
| static void | gfHitSort2 (struct gfHit **ptArray, int n) |
| void | gfHitSortDiagonal (struct gfHit **pList) |
| static int | gfHitCmpTarget (const void *va, const void *vb) |
| static int | gfHitCmpQuery (const void *va, const void *vb) |
| static void | gfClumpComputeQueryCoverage (struct gfClump *clumpList, int tileSize) |
| static int | gfClumpCmpQueryCoverage (const void *va, const void *vb) |
| static void | findClumpBounds (struct gfClump *clump, int tileSize) |
| static void | targetClump (struct genoFind *gf, struct gfClump **pClumpList, struct gfClump *clump) |
| static struct gfClump * | clumpNear (struct genoFind *gf, struct gfClump *oldClumps, int minMatch) |
| static struct gfClump * | clumpHits (struct genoFind *gf, struct gfHit *hitList, int minMatch) |
| static struct gfHit * | gfFastFindDnaHits (struct genoFind *gf, struct dnaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) |
| static struct gfHit * | gfStraightFindHits (struct genoFind *gf, aaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) |
| static struct gfHit * | gfStraightFindNearHits (struct genoFind *gf, aaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) |
| static struct gfHit * | gfSegmentedFindHits (struct genoFind *gf, aaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) |
| static struct gfHit * | gfSegmentedFindNearHits (struct genoFind *gf, aaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) |
| static struct gfHit * | gfFindHitsWithQmask (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) |
| gfClump * | gfFindClumpsWithQmask (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount) |
| gfHit * | gfFindHitsInRegion (struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, struct gfSeqSource *target, int tMin, int tMax) |
| gfClump * | gfFindClumps (struct genoFind *gf, bioSeq *seq, struct lm *lm, int *retHitCount) |
| void | gfTransFindClumps (struct genoFind *gfs[3], aaSeq *seq, struct gfClump *clumps[3], struct lm *lm, int *retHitCount) |
| void | gfTransTransFindClumps (struct genoFind *gfs[3], aaSeq *seqs[3], struct gfClump *clumps[3][3], struct lm *lm, int *retHitCount) |
| void | gfMakeOoc (char *outName, char *files[], int fileCount, int tileSize, bits32 maxPat, enum gfType tType) |
| gfSeqSource * | gfFindNamedSource (struct genoFind *gf, char *name) |
| static void | mergeAdd (struct binKeeper *bk, int start, int end, struct gfSeqSource *src) |
| static struct gfClump * | pcrClumps (struct genoFind *gf, char *fPrimer, int fPrimerSize, char *rPrimer, int rPrimerSize, int minDistance, int maxDistance) |
| gfClump * | gfPcrClumps (struct genoFind *gf, char *fPrimer, int fPrimerSize, char *rPrimer, int rPrimerSize, int minDistance, int maxDistance) |
Variables | |
| static char const | rcsid [] = "$Id: genoFind.c,v 1.23 2007/02/04 05:09:02 kent Exp $" |
| volatile boolean | pipeBroke = FALSE |
| static int | ntLookup [256] |
| static struct gfHit ** | nosTemp |
| static struct gfHit * | nosSwap |
| static int | cmpQuerySize |
| static int | gfNearEnough = 300 |
| static int bCmpSeqSource | ( | const void * | vTarget, | |
| const void * | vRange | |||
| ) | [static] |
Definition at line 929 of file genoFind.c.
References bits32, ss, and gfSeqSource::start.
Referenced by findSource().
00931 { 00932 const bits32 *pTarget = vTarget; 00933 bits32 target = *pTarget; 00934 const struct gfSeqSource *ss = vRange; 00935 00936 if (target < ss->start) return -1; 00937 if (target >= ss->end) return 1; 00938 return 0; 00939 }
Here is the caller graph for this function:

| static struct gfClump* clumpHits | ( | struct genoFind * | gf, | |
| struct gfHit * | hitList, | |||
| int | minMatch | |||
| ) | [static, read] |
Definition at line 1337 of file genoFind.c.
References AllocArray, AllocVar, bits32, clumpNear(), gfHit::diagonal, freez(), gfClumpCmpQueryCoverage(), gfClumpComputeQueryCoverage(), gfHitSortDiagonal(), gfNearEnough, genoFind::isPep, genoFind::maxGap, maxGap, gfClump::next, gfHit::next, slAddHead, slCat(), slSort(), genoFind::tileSize, tileSize, genoFind::totalSeqSize, gfHit::tStart, and uglyf.
Referenced by gfFindClumpsWithQmask().
01339 { 01340 struct gfClump *clumpList = NULL, *clump = NULL; 01341 int maxGap = gf->maxGap; 01342 struct gfHit *hit, *nextHit, *lastHit = NULL; 01343 int totalHits = 0, usedHits = 0, clumpCount = 0; 01344 int tileSize = gf->tileSize; 01345 int bucketShift = 16; /* 64k buckets. */ 01346 bits32 bucketSize = (1<<bucketShift); 01347 int bucketCount = (gf->totalSeqSize >> bucketShift) + 1; 01348 int nearEnough = (gf->isPep ? gfNearEnough/3 : gfNearEnough); 01349 bits32 boundary = bucketSize - nearEnough; 01350 int i; 01351 struct gfHit **buckets = NULL, **pb; 01352 01353 /* Sort hit list into buckets. */ 01354 AllocArray(buckets, bucketCount); 01355 for (hit = hitList; hit != NULL; hit = nextHit) 01356 { 01357 nextHit = hit->next; 01358 pb = buckets + (hit->tStart >> bucketShift); 01359 slAddHead(pb, hit); 01360 } 01361 01362 /* Sort each bucket on diagonal and clump. */ 01363 for (i=0; i<bucketCount; ++i) 01364 { 01365 int clumpSize; 01366 bits32 maxT; 01367 struct gfHit *clumpHits; 01368 pb = buckets + i; 01369 gfHitSortDiagonal(pb); 01370 for (hit = *pb; hit != NULL; ) 01371 { 01372 /* Each time through this loop will get info on a clump. Will only 01373 * actually create clump if it is big enough though. */ 01374 clumpSize = 0; 01375 clumpHits = lastHit = NULL; 01376 maxT = 0; 01377 for (; hit != NULL; hit = nextHit) 01378 { 01379 nextHit = hit->next; 01380 if (lastHit != NULL && hit->diagonal - lastHit->diagonal > maxGap) 01381 break; 01382 if (hit->tStart > maxT) maxT = hit->tStart; 01383 slAddHead(&clumpHits, hit); 01384 ++clumpSize; 01385 ++totalHits; 01386 lastHit = hit; 01387 } 01388 if (maxT > boundary && i < bucketCount-1) 01389 { 01390 /* Move clumps that are near boundary to next bucket to give them a 01391 * chance to merge with hits there. */ 01392 buckets[i+1] = slCat(clumpHits, buckets[i+1]); 01393 } 01394 else if (clumpSize >= minMatch) 01395 { 01396 /* Save clumps that are large enough on list. */ 01397 AllocVar(clump); 01398 slAddHead(&clumpList, clump); 01399 clump->hitCount = clumpSize; 01400 clump->hitList = clumpHits; 01401 usedHits += clumpSize; 01402 ++clumpCount; 01403 } 01404 } 01405 *pb = NULL; 01406 boundary += bucketSize; 01407 } 01408 clumpList = clumpNear(gf, clumpList, minMatch); 01409 gfClumpComputeQueryCoverage(clumpList, tileSize); /* Thanks AG */ 01410 slSort(&clumpList, gfClumpCmpQueryCoverage); 01411 01412 #ifdef DEBUG 01413 uglyf("Dumping clumps B\n"); 01414 for (clump = clumpList; clump != NULL; clump = clump->next) /* uglyf */ 01415 { 01416 uglyf(" %d %d %s %d %d (%d hits)\n", clump->qStart, clump->qEnd, clump->target->seq->name, clump->tStart, clump->tEnd, clump->hitCount); 01417 } 01418 #endif /* DEBUG */ 01419 freez(&buckets); 01420 return clumpList; 01421 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct gfClump* clumpNear | ( | struct genoFind * | gf, | |
| struct gfClump * | oldClumps, | |||
| int | minMatch | |||
| ) | [static, read] |
Definition at line 1276 of file genoFind.c.
References AllocVar, bits32, findClumpBounds(), gfClumpFree(), gfHitCmpTarget(), gfNearEnough, genoFind::isPep, gfHit::next, slAddHead, slReverse(), slSort(), targetClump(), genoFind::tileSize, tileSize, and gfHit::tStart.
Referenced by clumpHits().
01279 { 01280 struct gfClump *newClumps = NULL, *clump, *nextClump; 01281 struct gfHit *hit, *nextHit; 01282 int tileSize = gf->tileSize; 01283 bits32 lastT; 01284 int nearEnough = (gf->isPep ? gfNearEnough/3 : gfNearEnough); 01285 01286 for (clump = oldClumps; clump != NULL; clump = nextClump) 01287 { 01288 struct gfHit *newHits = NULL, *oldHits = clump->hitList; 01289 int clumpSize = 0; 01290 clump->hitCount = 0; 01291 clump->hitList = NULL; /* Clump no longer owns list. */ 01292 nextClump = clump->next; 01293 slSort(&oldHits, gfHitCmpTarget); 01294 lastT = oldHits->tStart; 01295 for (hit = oldHits; hit != NULL; hit = nextHit) 01296 { 01297 nextHit = hit->next; 01298 if (hit->tStart > nearEnough + lastT) 01299 { 01300 if (clumpSize >= minMatch) 01301 { 01302 slReverse(&newHits); 01303 clump->hitList = newHits; 01304 newHits = NULL; 01305 clump->hitCount = clumpSize; 01306 findClumpBounds(clump, tileSize); 01307 targetClump(gf, &newClumps, clump); 01308 AllocVar(clump); 01309 } 01310 else 01311 { 01312 newHits = NULL; 01313 clump->hitCount = 0; 01314 } 01315 clumpSize = 0; 01316 } 01317 lastT = hit->tStart; 01318 slAddHead(&newHits, hit); 01319 ++clumpSize; 01320 } 01321 slReverse(&newHits); 01322 clump->hitList = newHits; 01323 if (clumpSize >= minMatch) 01324 { 01325 clump->hitCount = clumpSize; 01326 findClumpBounds(clump, tileSize); 01327 targetClump(gf, &newClumps, clump); 01328 } 01329 else 01330 { 01331 gfClumpFree(&clump); 01332 } 01333 } 01334 return newClumps; 01335 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void findClumpBounds | ( | struct gfClump * | clump, | |
| int | tileSize | |||
| ) | [static] |
Definition at line 1189 of file genoFind.c.
References gfClump::hitList, gfHit::next, gfClump::qEnd, gfHit::qStart, gfClump::qStart, gfClump::tEnd, gfHit::tStart, and gfClump::tStart.
Referenced by clumpNear(), and targetClump().
01191 { 01192 struct gfHit *hit; 01193 int x; 01194 hit = clump->hitList; 01195 if (hit == NULL) 01196 return; 01197 clump->qStart = clump->qEnd = hit->qStart; 01198 clump->tStart = clump->tEnd = hit->tStart; 01199 for (hit = hit->next; hit != NULL; hit = hit->next) 01200 { 01201 x = hit->qStart; 01202 if (x < clump->qStart) clump->qStart = x; 01203 if (x > clump->qEnd) clump->qEnd = x; 01204 x = hit->tStart; 01205 if (x < clump->tStart) clump->tStart = x; 01206 if (x > clump->tEnd) clump->tEnd = x; 01207 } 01208 clump->tEnd += tileSize; 01209 clump->qEnd += tileSize; 01210 }
Here is the caller graph for this function:

| static struct gfSeqSource* findSource | ( | struct genoFind * | gf, | |
| bits32 | targetPos | |||
| ) | [static, read] |
Definition at line 941 of file genoFind.c.
References bCmpSeqSource(), errAbort(), genoFind::sourceCount, genoFind::sources, and ss.
Referenced by gfFastFindDnaHits(), gfSegmentedFindHits(), gfSegmentedFindNearHits(), gfStraightFindHits(), gfStraightFindNearHits(), pcrClumps(), and targetClump().
00943 { 00944 struct gfSeqSource *ss = bsearch(&targetPos, gf->sources, gf->sourceCount, 00945 sizeof(gf->sources[0]), bCmpSeqSource); 00946 if (ss == NULL) 00947 errAbort("Couldn't find source for %d", targetPos); 00948 return ss; 00949 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void genoFindFree | ( | struct genoFind ** | pGenoFind | ) |
Definition at line 67 of file genoFind.c.
References genoFind::allocated, bitFree(), freeMem(), freez(), genoFind::lists, genoFind::listSizes, gfSeqSource::maskedBits, genoFind::sourceCount, and genoFind::sources.
Referenced by bigBlat(), genoFindDirect(), genoPcrDirect(), and gfMakeOoc().
00069 { 00070 struct genoFind *gf = *pGenoFind; 00071 int i; 00072 struct gfSeqSource *sources; 00073 if (gf != NULL) 00074 { 00075 freeMem(gf->lists); 00076 freeMem(gf->listSizes); 00077 freeMem(gf->allocated); 00078 if ((sources = gf->sources) != NULL) 00079 { 00080 for (i=0; i<gf->sourceCount; ++i) 00081 bitFree(&sources[i].maskedBits); 00082 freeMem(sources); 00083 } 00084 freez(pGenoFind); 00085 } 00086 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 413 of file genoFind.c.
References bits16, bits32, dnaSeq::dna, genoFind::endLists, gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::listSizes, genoFind::segSize, dnaSeq::size, genoFind::stepSize, stepSize, genoFind::tileSize, and tileSize.
Referenced by gfLargeIndexSeq().
00415 { 00416 char *poly = seq->dna; 00417 int tileSize = gf->tileSize; 00418 int stepSize = gf->stepSize; 00419 int tileTailSize = gf->segSize; 00420 int tileHeadSize = tileSize - tileTailSize; 00421 int i, lastTile = seq->size - tileSize; 00422 int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); 00423 int tileHead; 00424 int tileTail; 00425 bits32 *listSizes = gf->listSizes; 00426 bits16 **endLists = gf->endLists; 00427 bits16 *endList; 00428 int headCount; 00429 00430 initNtLookup(); 00431 for (i=0; i<=lastTile; i += stepSize) 00432 { 00433 tileHead = makeTile(poly, tileHeadSize); 00434 tileTail = makeTile(poly + tileHeadSize, tileTailSize); 00435 if (tileHead >= 0 && tileTail >= 0) 00436 { 00437 endList = endLists[tileHead]; 00438 headCount = listSizes[tileHead]++; 00439 endList += headCount * 3; /* Because have three slots per. */ 00440 endList[0] = tileTail; 00441 endList[1] = (offset >> 16); 00442 endList[2] = (offset&0xffff); 00443 } 00444 offset += stepSize; 00445 poly += stepSize; 00446 } 00447 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 384 of file genoFind.c.
References bits32, dnaSeq::dna, gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::lists, genoFind::listSizes, genoFind::maxPat, dnaSeq::size, genoFind::stepSize, stepSize, genoFind::tileSize, and tileSize.
Referenced by gfAddTilesInNib(), gfIndexNibsAndTwoBits(), gfSmallIndexSeq(), and transIndexBothStrands().
00386 { 00387 char *poly = seq->dna; 00388 int tileSize = gf->tileSize; 00389 int stepSize = gf->stepSize; 00390 int i, lastTile = seq->size - tileSize; 00391 int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); 00392 int maxPat = gf->maxPat; 00393 int tile; 00394 bits32 *listSizes = gf->listSizes; 00395 bits32 **lists = gf->lists; 00396 00397 initNtLookup(); 00398 for (i=0; i<=lastTile; i += stepSize) 00399 { 00400 tile = makeTile(poly, tileSize); 00401 if (tile >= 0) 00402 { 00403 if (listSizes[tile] < maxPat) 00404 { 00405 lists[tile][listSizes[tile]++] = offset; 00406 } 00407 } 00408 offset += stepSize; 00409 poly += stepSize; 00410 } 00411 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int gfAddTilesInNib | ( | struct genoFind * | gf, | |
| char * | fileName, | |||
| bits32 | offset, | |||
| int | stepSize | |||
| ) | [static] |
Definition at line 451 of file genoFind.c.
References freeDnaSeq(), gfAddSeq(), nibLdPart(), nibOpenVerify(), genoFind::tileSize, and tileSize.
Referenced by gfIndexNibsAndTwoBits().
00454 { 00455 FILE *f; 00456 int tileSize = gf->tileSize; 00457 int bufSize = tileSize * 16*1024; 00458 int nibSize, i; 00459 int endBuf, basesInBuf; 00460 struct dnaSeq *seq; 00461 00462 printf("Adding tiles in %s\n", fileName); 00463 nibOpenVerify(fileName, &f, &nibSize); 00464 for (i=0; i < nibSize; i = endBuf) 00465 { 00466 endBuf = i + bufSize; 00467 if (endBuf >= nibSize) endBuf = nibSize; 00468 basesInBuf = endBuf - i; 00469 seq = nibLdPart(fileName, f, nibSize, i, basesInBuf); 00470 gfAddSeq(gf, seq, i + offset); 00471 freeDnaSeq(&seq); 00472 } 00473 fclose(f); 00474 return nibSize; 00475 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int gfAllocLargeLists | ( | struct genoFind * | gf | ) | [static] |
Definition at line 358 of file genoFind.c.
References genoFind::allocated, bits16, bits32, genoFind::endLists, genoFind::listSizes, needHugeMem(), and genoFind::tileSpaceSize.
Referenced by gfLargeIndexSeq().
00361 { 00362 int count = 0; 00363 int i; 00364 bits32 *listSizes = gf->listSizes; 00365 bits16 **endLists = gf->endLists; 00366 bits16 *allocated = NULL; 00367 int size; 00368 int tileSpaceSize = gf->tileSpaceSize; 00369 00370 for (i=0; i<tileSpaceSize; ++i) 00371 count += listSizes[i]; 00372 if (count > 0) 00373 gf->allocated = allocated = needHugeMem(3*count*sizeof(allocated[0])); 00374 for (i=0; i<tileSpaceSize; ++i) 00375 { 00376 size = listSizes[i]; 00377 endLists[i] = allocated; 00378 allocated += 3*size; 00379 } 00380 return count; 00381 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int gfAllocLists | ( | struct genoFind * | gf | ) | [static] |
Definition at line 317 of file genoFind.c.
References genoFind::allocated, bits32, genoFind::lists, genoFind::listSizes, genoFind::maxPat, needHugeMem(), and genoFind::tileSpaceSize.
Referenced by gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), and gfSmallIndexSeq().
00320 { 00321 int oneCount; 00322 int count = 0; 00323 int i; 00324 bits32 *listSizes = gf->listSizes; 00325 bits32 **lists = gf->lists; 00326 bits32 *allocated = NULL; 00327 bits32 maxPat = gf->maxPat; 00328 int size; 00329 int usedCount = 0, overusedCount = 0; 00330 int tileSpaceSize = gf->tileSpaceSize; 00331 00332 for (i=0; i<tileSpaceSize; ++i) 00333 { 00334 /* If pattern is too much used it's no good to us, ignore. */ 00335 if ((oneCount = listSizes[i]) < maxPat) 00336 { 00337 count += oneCount; 00338 usedCount += 1; 00339 } 00340 else 00341 { 00342 overusedCount += 1; 00343 } 00344 } 00345 if (count > 0) 00346 gf->allocated = allocated = needHugeMem(count*sizeof(allocated[0])); 00347 for (i=0; i<tileSpaceSize; ++i) 00348 { 00349 if ((size = listSizes[i]) < maxPat) 00350 { 00351 lists[i] = allocated; 00352 allocated += size; 00353 } 00354 } 00355 return count; 00356 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfCatchPipes | ( | ) |
Definition at line 38 of file genoFind.c.
References gfPipeHandler().
Referenced by main().
00040 { 00041 signal(SIGPIPE, gfPipeHandler); 00042 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfCheckTileSize | ( | int | tileSize, | |
| boolean | isPep | |||
| ) |
Definition at line 97 of file genoFind.c.
References errAbort().
Referenced by gfNewEmpty(), and main().
00099 { 00100 if (isPep) 00101 { 00102 if (tileSize < 3 || tileSize > 8) 00103 errAbort("protein tileSize must be between 3 and 8"); 00104 } 00105 else 00106 { 00107 if (tileSize < 6 || tileSize > 18) 00108 errAbort("DNA tileSize must be between 6 and 18"); 00109 } 00110 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int gfClumpCmpQueryCoverage | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 1180 of file genoFind.c.
References gfClump::queryCoverage.
Referenced by clumpHits().
01182 { 01183 const struct gfClump *a = *((struct gfClump **)va); 01184 const struct gfClump *b = *((struct gfClump **)vb); 01185 01186 return (b->queryCoverage - a->queryCoverage); 01187 }
Here is the caller graph for this function:

| static void gfClumpComputeQueryCoverage | ( | struct gfClump * | clumpList, | |
| int | tileSize | |||
| ) | [static] |
Definition at line 1140 of file genoFind.c.
References gfHitCmpQuery(), gfClump::hitList, gfHit::next, gfClump::next, gfHit::qStart, gfClump::queryCoverage, and slSort().
Referenced by clumpHits().
01143 { 01144 struct gfClump *clump; 01145 struct gfHit *hit; 01146 struct gfHit *hitList; 01147 int qcov; 01148 int blockStart, blockEnd; 01149 01150 for (clump = clumpList; clump != NULL; clump = clump->next) 01151 { 01152 hitList = clump->hitList; 01153 if ( hitList != NULL) 01154 { 01155 slSort(&hitList, gfHitCmpQuery); 01156 qcov = 0; 01157 blockStart = hitList->qStart; 01158 blockEnd = blockStart + tileSize; 01159 for (hit = hitList; hit != NULL; hit = hit->next) 01160 { 01161 if (hit->qStart > blockEnd) 01162 { 01163 qcov += blockEnd - blockStart; 01164 blockStart = hit->qStart; 01165 blockEnd = blockStart + tileSize; 01166 } 01167 else if (hit->qStart + tileSize > blockEnd) 01168 { 01169 blockEnd = hit->qStart + tileSize; 01170 } 01171 } 01172 qcov += blockEnd - blockStart; 01173 clump->queryCoverage = qcov; 01174 } 01175 } 01176 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 973 of file genoFind.c.
References gfClump::hitCount, gfClump::hitList, name, gfHit::next, gfClump::qEnd, gfClump::qStart, ss, gfClump::target, gfClump::tEnd, and gfClump::tStart.
Referenced by genoFindDirect(), and genoPcrDirect().
00975 { 00976 struct gfSeqSource *ss = clump->target; 00977 char *name = ss->fileName; 00978 00979 if (name == NULL) name = ss->seq->name; 00980 fprintf(f, "%u-%u %s %u-%u, hits %d\n", 00981 clump->qStart, clump->qEnd, name, 00982 clump->tStart - ss->start, clump->tEnd - ss->start, 00983 clump->hitCount); 00984 #ifdef SOMETIMES 00985 for (hit = clump->hitList; hit != NULL; hit = hit->next) 00986 fprintf(f, " q %d, t %d, diag %d\n", hit->qStart, hit->tStart, hit->diagonal); 00987 #endif 00988 }
Here is the caller graph for this function:

| void gfClumpFree | ( | struct gfClump ** | pClump | ) |
Definition at line 951 of file genoFind.c.
References freez().
Referenced by clumpNear(), gfClumpFreeList(), and targetClump().
00953 { 00954 struct gfClump *clump; 00955 if ((clump = *pClump) == NULL) 00956 return; 00957 freez(pClump); 00958 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfClumpFreeList | ( | struct gfClump ** | pList | ) |
Definition at line 960 of file genoFind.c.
References gfClumpFree(), and gfClump::next.
Referenced by dnaQuery(), ffSeedExtInMem(), genoFindDirect(), gfAlignTrans(), gfFindAlignAaTrans(), gfTransTransFindBundles(), pcrQuery(), searchOneProt(), transQuery(), and transTransQuery().
00962 { 00963 struct gfClump *el, *next; 00964 00965 for (el = *pList; el != NULL; el = next) 00966 { 00967 next = el->next; 00968 gfClumpFree(&el); 00969 } 00970 *pList = NULL; 00971 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 222 of file genoFind.c.
References bits32, dnaSeq::dna, gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::listSizes, genoFind::maxPat, genoFind::segSize, dnaSeq::size, genoFind::stepSize, stepSize, genoFind::tileSize, and tileSize.
Referenced by gfCountTilesInNib(), gfCountTilesInTwoBit(), gfLargeIndexSeq(), gfMakeOoc(), gfSmallIndexSeq(), and transCountBothStrands().
00224 { 00225 char *poly = seq->dna; 00226 int tileSize = gf->tileSize; 00227 int stepSize = gf->stepSize; 00228 int tileHeadSize = gf->tileSize - gf->segSize; 00229 int maxPat = gf->maxPat; 00230 int tile; 00231 bits32 *listSizes = gf->listSizes; 00232 int i, lastTile = seq->size - tileSize; 00233 int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); 00234 00235 initNtLookup(); 00236 for (i=0; i<=lastTile; i += stepSize) 00237 { 00238 if ((tile = makeTile(poly, tileHeadSize)) >= 0) 00239 { 00240 if (listSizes[tile] < maxPat) 00241 { 00242 listSizes[tile] += 1; 00243 } 00244 } 00245 poly += stepSize; 00246 } 00247 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int gfCountTilesInNib | ( | struct genoFind * | gf, | |
| int | stepSize, | |||
| char * | fileName | |||
| ) | [static] |
Definition at line 249 of file genoFind.c.
References freeDnaSeq(), gfCountSeq(), nibLdPart(), nibOpenVerify(), genoFind::tileSize, and tileSize.
Referenced by gfIndexNibsAndTwoBits().
00251 { 00252 FILE *f; 00253 int tileSize = gf->tileSize; 00254 int bufSize = tileSize * 16*1024; 00255 int nibSize, i; 00256 int endBuf, basesInBuf; 00257 struct dnaSeq *seq; 00258 00259 printf("Counting tiles in %s\n", fileName); 00260 nibOpenVerify(fileName, &f, &nibSize); 00261 for (i=0; i < nibSize; i = endBuf) 00262 { 00263 endBuf = i + bufSize; 00264 if (endBuf >= nibSize) endBuf = nibSize; 00265 basesInBuf = endBuf - i; 00266 seq = nibLdPart(fileName, f, nibSize, i, basesInBuf); 00267 gfCountSeq(gf, seq); 00268 freeDnaSeq(&seq); 00269 } 00270 fclose(f); 00271 return nibSize; 00272 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void gfCountTilesInTwoBit | ( | struct genoFind * | gf, | |
| int | stepSize, | |||
| char * | fileName, | |||
| int * | retSeqCount, | |||
| long long * | retBaseCount | |||
| ) | [static] |
Definition at line 293 of file genoFind.c.
References baseCount, freeDnaSeq(), gfCountSeq(), twoBitFile::indexList, twoBitIndex::name, twoBitIndex::next, twoBitCheckTotalSize(), twoBitClose(), twoBitOpen(), and twoBitReadSeqFragLower().
Referenced by gfIndexNibsAndTwoBits().
00297 { 00298 struct dnaSeq *seq; 00299 struct twoBitFile *tbf = twoBitOpen(fileName); 00300 struct twoBitIndex *index; 00301 int seqCount = 0; 00302 long long baseCount = twoBitCheckTotalSize(tbf); 00303 00304 printf("Counting tiles in %s\n", fileName); 00305 for (index = tbf->indexList; index != NULL; index = index->next) 00306 { 00307 seq = twoBitReadSeqFragLower(tbf, index->name, 0, 0); 00308 gfCountSeq(gf, seq); 00309 ++seqCount; 00310 freeDnaSeq(&seq); 00311 } 00312 twoBitClose(&tbf); 00313 *retSeqCount = seqCount; 00314 *retBaseCount = baseCount; 00315 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int gfDnaTile | ( | DNA * | dna, | |
| int | n | |||
| ) |
Definition at line 191 of file genoFind.c.
References ntLookup.
Referenced by gfAddLargeSeq(), gfAddSeq(), gfCountSeq(), gfSegmentedFindHits(), gfSegmentedFindNearHits(), gfStraightFindHits(), gfStraightFindNearHits(), and pcrClumps().
00193 { 00194 int tile = 0; 00195 int i, c; 00196 for (i=0; i<n; ++i) 00197 { 00198 tile <<= 2; 00199 if ((c = ntLookup[(int)dna[i]]) < 0) 00200 return -1; 00201 tile += c; 00202 } 00203 return tile; 00204 }
Here is the caller graph for this function:

| static struct gfHit* gfFastFindDnaHits | ( | struct genoFind * | gf, | |
| struct dnaSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [static, read] |
Definition at line 1424 of file genoFind.c.
References bitCountRange(), bits32, dnaSeq::dna, findSource(), genoFind::lists, genoFind::listSizes, lm, lmAllocVar, mask, ntValNoN, gfHit::qStart, dnaSeq::size, slAddHead, genoFind::tileMask, genoFind::tileSize, and gfHit::tStart.
Referenced by gfFindHitsWithQmask().
01429 { 01430 struct gfHit *hitList = NULL, *hit; 01431 int size = seq->size; 01432 int tileSizeMinusOne = gf->tileSize - 1; 01433 int mask = gf->tileMask; 01434 DNA *dna = seq->dna; 01435 int i, j; 01436 bits32 bits = 0; 01437 bits32 bVal; 01438 int listSize; 01439 bits32 qStart, *tList; 01440 int hitCount = 0; 01441 01442 for (i=0; i<tileSizeMinusOne; ++i) 01443 { 01444 bVal = ntValNoN[(int)dna[i]]; 01445 bits <<= 2; 01446 bits += bVal; 01447 } 01448 for (i=tileSizeMinusOne; i<size; ++i) 01449 { 01450 bVal = ntValNoN[(int)dna[i]]; 01451 bits <<= 2; 01452 bits += bVal; 01453 bits &= mask; 01454 listSize = gf->listSizes[bits]; 01455 if (listSize != 0) 01456 { 01457 qStart = i-tileSizeMinusOne; 01458 if (qMaskBits == NULL || bitCountRange(qMaskBits, qStart+qMaskOffset, gf->tileSize) == 0) 01459 { 01460 tList = gf->lists[bits]; 01461 for (j=0; j<listSize; ++j) 01462 { 01463 int tStart = tList[j]; 01464 if (target == NULL || 01465 (target == findSource(gf, tStart) && tStart >= tMin && tStart < tMax) ) 01466 { 01467 lmAllocVar(lm, hit); 01468 hit->qStart = qStart; 01469 hit->tStart = tStart; 01470 hit->diagonal = tStart + size - qStart; 01471 slAddHead(&hitList, hit); 01472 ++hitCount; 01473 } 01474 } 01475 } 01476 } 01477 } 01478 *retHitCount = hitCount; 01479 return hitList; 01480 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfClump* gfFindClumps | ( | struct genoFind * | gf, | |
| bioSeq * | seq, | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) | [read] |
Definition at line 1890 of file genoFind.c.
References gfFindClumpsWithQmask(), and lm.
Referenced by dnaQuery(), genoFindDirect(), gfTransFindClumps(), and searchOneProt().
01892 { 01893 return gfFindClumpsWithQmask(gf, seq, NULL, 0, lm, retHitCount); 01894 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfClump* gfFindClumpsWithQmask | ( | struct genoFind * | gf, | |
| bioSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) | [read] |
Definition at line 1849 of file genoFind.c.
References clumpHits(), cmpQuerySize, gfFindHitsWithQmask(), lm, minMatch, genoFind::minMatch, dnaSeq::size, and genoFind::tileSize.
Referenced by ffSeedExtInMem(), and gfFindClumps().
01853 { 01854 struct gfClump *clumpList = NULL; 01855 struct gfHit *hitList; 01856 int minMatch = gf->minMatch; 01857 01858 #ifdef OLD /* stepSize makes this obsolete. */ 01859 if (seq->size < gf->tileSize * (gf->minMatch+1)) 01860 minMatch = 1; 01861 #endif /* OLD */ 01862 01863 hitList = gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm, 01864 retHitCount, NULL, 0, 0); 01865 cmpQuerySize = seq->size; 01866 clumpList = clumpHits(gf, hitList, minMatch); 01867 return clumpList; 01868 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfHit* gfFindHitsInRegion | ( | struct genoFind * | gf, | |
| bioSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [read] |
Definition at line 1870 of file genoFind.c.
References gfFindHitsWithQmask(), lm, gfHit::next, gfSeqSource::start, and gfHit::tStart.
Referenced by scanIndexForSmallExons().
01877 { 01878 int targetStart; 01879 struct gfHit *hitList, *hit; 01880 int hitCount; 01881 01882 targetStart = target->start; 01883 hitList = gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm, 01884 &hitCount, target, tMin + targetStart, tMax + targetStart); 01885 for (hit = hitList; hit != NULL; hit = hit->next) 01886 hit->tStart -= targetStart; 01887 return hitList; 01888 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct gfHit* gfFindHitsWithQmask | ( | struct genoFind * | gf, | |
| bioSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [static, read] |
Definition at line 1805 of file genoFind.c.
References genoFind::allowOneMismatch, gfFastFindDnaHits(), gfSegmentedFindHits(), gfSegmentedFindNearHits(), gfStraightFindHits(), gfStraightFindNearHits(), genoFind::isPep, lm, and genoFind::segSize.
Referenced by gfFindClumpsWithQmask(), and gfFindHitsInRegion().
01810 { 01811 struct gfHit *hitList = NULL; 01812 if (gf->segSize == 0 && !gf->isPep && !gf->allowOneMismatch) 01813 { 01814 hitList = gfFastFindDnaHits(gf, seq, qMaskBits, qMaskOffset, lm, retHitCount, 01815 target, tMin, tMax); 01816 } 01817 else 01818 { 01819 if (gf->segSize == 0) 01820 { 01821 if (gf->allowOneMismatch) 01822 { 01823 hitList = gfStraightFindNearHits(gf, seq, qMaskBits, qMaskOffset, lm, 01824 retHitCount, target, tMin, tMax); 01825 } 01826 else 01827 { 01828 hitList = gfStraightFindHits(gf, seq, qMaskBits, qMaskOffset, lm, retHitCount, 01829 target, tMin, tMax); 01830 } 01831 } 01832 else 01833 { 01834 if (gf->allowOneMismatch) 01835 { 01836 hitList = gfSegmentedFindNearHits(gf, seq, qMaskBits, qMaskOffset, lm, retHitCount, 01837 target, tMin, tMax); 01838 } 01839 else 01840 { 01841 hitList = gfSegmentedFindHits(gf, seq, qMaskBits, qMaskOffset, lm, retHitCount, 01842 target, tMin, tMax); 01843 } 01844 } 01845 } 01846 return hitList; 01847 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfSeqSource* gfFindNamedSource | ( | struct genoFind * | gf, | |
| char * | name | |||
| ) | [read] |
Definition at line 2010 of file genoFind.c.
References gfSeqSource::fileName, dnaSeq::name, sameString, gfSeqSource::seq, genoFind::sourceCount, genoFind::sources, and splitPath().
Referenced by refineBundle().
02012 { 02013 struct gfSeqSource *source = gf->sources; 02014 int count = gf->sourceCount; 02015 02016 if (source->seq == NULL) /* Use first source to see if seq or file. */ 02017 { 02018 char rootName[256]; 02019 while (--count >= 0) 02020 { 02021 splitPath(source->fileName, NULL, rootName, NULL); 02022 if (sameString(name, rootName)) 02023 return source; 02024 } 02025 } 02026 else 02027 { 02028 while (--count >= 0) 02029 { 02030 if (sameString(source->seq->name, name)) 02031 return source; 02032 source += 1; 02033 } 02034 } 02035 return NULL; 02036 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int gfHitCmpQuery | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 1126 of file genoFind.c.
References gfHit::qStart.
Referenced by gfClumpComputeQueryCoverage().
01128 { 01129 const struct gfHit *a = *((struct gfHit **)va); 01130 const struct gfHit *b = *((struct gfHit **)vb); 01131 01132 if (a->qStart > b->qStart) 01133 return 1; 01134 else if (a->qStart == b->qStart) 01135 return 0; 01136 else 01137 return -1; 01138 }
Here is the caller graph for this function:

| static int gfHitCmpTarget | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 1112 of file genoFind.c.
References gfHit::tStart.
Referenced by clumpNear().
01114 { 01115 const struct gfHit *a = *((struct gfHit **)va); 01116 const struct gfHit *b = *((struct gfHit **)vb); 01117 01118 if (a->tStart > b->tStart) 01119 return 1; 01120 else if (a->tStart == b->tStart) 01121 return 0; 01122 else 01123 return -1; 01124 }
Here is the caller graph for this function:

| static void gfHitSort2 | ( | struct gfHit ** | ptArray, | |
| int | n | |||
| ) | [static] |
Definition at line 1001 of file genoFind.c.
References gfHit::diagonal, nosSwap, and nosTemp.
Referenced by gfHitSortDiagonal().
01005 { 01006 struct gfHit **tmp, **pt1, **pt2; 01007 int n1, n2; 01008 01009 /* Divide area to sort in two. */ 01010 n1 = (n>>1); 01011 n2 = n - n1; 01012 pt1 = ptArray; 01013 pt2 = ptArray + n1; 01014 01015 /* Sort each area separately. Handle small case (2 or less elements) 01016 * here. Otherwise recurse to sort. */ 01017 if (n1 > 2) 01018 gfHitSort2(pt1, n1); 01019 else if (n1 == 2 && pt1[0]->diagonal > pt1[1]->diagonal) 01020 { 01021 nosSwap = pt1[1]; 01022 pt1[1] = pt1[0]; 01023 pt1[0] = nosSwap; 01024 } 01025 if (n2 > 2) 01026 gfHitSort2(pt2, n2); 01027 else if (n2 == 2 && pt2[0]->diagonal > pt2[1]->diagonal) 01028 { 01029 nosSwap = pt2[1]; 01030 pt2[1] = pt2[0]; 01031 pt2[0] = nosSwap; 01032 } 01033 01034 /* At this point both halves are internally sorted. 01035 * Do a merge-sort between two halves copying to temp 01036 * buffer. Then copy back sorted result to main buffer. */ 01037 tmp = nosTemp; 01038 while (n1 > 0 && n2 > 0) 01039 { 01040 if ((*pt1)->diagonal <= (*pt2)->diagonal) 01041 { 01042 --n1; 01043 *tmp++ = *pt1++; 01044 } 01045 else 01046 { 01047 --n2; 01048 *tmp++ = *pt2++; 01049 } 01050 } 01051 /* One or both sides are now fully merged. */ 01052 assert(tmp + n1 + n2 == nosTemp + n); 01053 01054 /* If some of first side left to merge copy it to end of temp buf. */ 01055 if (n1 > 0) 01056 memcpy(tmp, pt1, n1 * sizeof(*tmp)); 01057 01058 /* If some of second side left to merge, we finesse it here: 01059 * simply refrain from copying over it as we copy back temp buf. */ 01060 memcpy(ptArray, nosTemp, (n - n2) * sizeof(*ptArray)); 01061 }
Here is the caller graph for this function:

| void gfHitSortDiagonal | ( | struct gfHit ** | pList | ) |
Definition at line 1063 of file genoFind.c.
References freez(), gfHitSort2(), needLargeMem(), gfHit::next, nosTemp, slCount(), and slReverse().
Referenced by clumpHits().
01065 { 01066 struct gfHit *list = *pList; 01067 if (list != NULL && list->next != NULL) 01068 { 01069 int count = slCount(list); 01070 struct gfHit *el; 01071 struct gfHit **array; 01072 int i; 01073 int byteSize = count*sizeof(*array); 01074 array = needLargeMem(byteSize); 01075 nosTemp = needLargeMem(byteSize); 01076 for (el = list, i=0; el != NULL; el = el->next, i++) 01077 array[i] = el; 01078 gfHitSort2(array, count); 01079 list = NULL; 01080 for (i=0; i<count; ++i) 01081 { 01082 array[i]->next = list; 01083 list = array[i]; 01084 } 01085 freez(&array); 01086 freez(&nosTemp); 01087 slReverse(&list); 01088 *pList = list; 01089 } 01090 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct genoFind* gfIndexNibsAndTwoBits | ( | int | fileCount, | |
| char * | fileNames[], | |||
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | allowOneMismatch, | |||
| int | stepSize | |||
| ) | [read] |
Definition at line 514 of file genoFind.c.
References AllocArray, baseCount, bits32, cloneString(), dnaSeqFree, errAbort(), FALSE, gfAddSeq(), gfAddTilesInNib(), gfAllocLists(), gfCountTilesInNib(), gfCountTilesInTwoBit(), gfNewEmpty(), gfZeroNonOverused(), gfZeroOverused(), twoBitFile::indexList, maxTotalBases(), twoBitIndex::name, twoBitIndex::next, nibIsFile(), PATH_LEN, safef(), dnaSeq::size, genoFind::sourceCount, genoFind::sources, ss, genoFind::tileSize, genoFind::totalSeqSize, twoBitClose(), twoBitIsFile(), twoBitOpen(), and twoBitReadSeqFragLower().
Referenced by genoFindDirect(), genoPcrDirect(), and startServer().
00525 { 00526 struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, 00527 maxPat, oocFile, FALSE, allowOneMismatch); 00528 int i; 00529 bits32 offset = 0, nibSize; 00530 char *fileName; 00531 struct gfSeqSource *ss; 00532 long long totalBases = 0, warnAt = maxTotalBases(); 00533 int totalSeq = 0; 00534 00535 if (allowOneMismatch) 00536 errAbort("Don't currently support allowOneMismatch in gfIndexNibsAndTwoBits"); 00537 if (stepSize == 0) 00538 stepSize = gf->tileSize; 00539 for (i=0; i<fileCount; ++i) 00540 { 00541 fileName = fileNames[i]; 00542 if (twoBitIsFile(fileName)) 00543 { 00544 int seqCount; 00545 long long baseCount; 00546 gfCountTilesInTwoBit(gf, stepSize, fileName, &seqCount, &baseCount); 00547 totalBases += baseCount; 00548 totalSeq += seqCount; 00549 } 00550 else if (nibIsFile(fileName)) 00551 { 00552 totalBases += gfCountTilesInNib(gf, stepSize, fileName); 00553 totalSeq += 1; 00554 } 00555 else 00556 errAbort("Unrecognized file type %s", fileName); 00557 /* Warn if they exceed 4 gig. */ 00558 if (totalBases >= warnAt) 00559 errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that."); 00560 } 00561 gfAllocLists(gf); 00562 gfZeroNonOverused(gf); 00563 AllocArray(gf->sources, totalSeq); 00564 gf->sourceCount = totalSeq; 00565 ss = gf->sources; 00566 for (i=0; i<fileCount; ++i) 00567 { 00568 fileName = fileNames[i]; 00569 if (nibIsFile(fileName)) 00570 { 00571 nibSize = gfAddTilesInNib(gf, fileName, offset, stepSize); 00572 ss->fileName = fileName; 00573 ss->start = offset; 00574 offset += nibSize; 00575 ss->end = offset; 00576 ++ss; 00577 } 00578 else 00579 { 00580 struct twoBitFile *tbf = twoBitOpen(fileName); 00581 struct twoBitIndex *index; 00582 char nameBuf[PATH_LEN+256]; 00583 for (index = tbf->indexList; index != NULL; index = index->next) 00584 { 00585 struct dnaSeq *seq = twoBitReadSeqFragLower(tbf, index->name, 0,0); 00586 gfAddSeq(gf, seq, offset); 00587 safef(nameBuf, sizeof(nameBuf), "%s:%s", fileName, index->name); 00588 ss->fileName = cloneString(nameBuf); 00589 ss->start = offset; 00590 offset += seq->size; 00591 ss->end = offset; 00592 ++ss; 00593 dnaSeqFree(&seq); 00594 } 00595 twoBitClose(&tbf); 00596 } 00597 } 00598 gf->totalSeqSize = offset; 00599 gfZeroOverused(gf); 00600 printf("Done adding\n"); 00601 return gf; 00602 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct genoFind* gfIndexSeq | ( | bioSeq * | seqList, | |
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | isPep, | |||
| boolean | allowOneMismatch, | |||
| boolean | maskUpper, | |||
| int | stepSize | |||
| ) | [read] |
Definition at line 907 of file genoFind.c.
References gfLargeIndexSeq(), gfNewEmpty(), gfSmallIndexSeq(), and genoFind::segSize.
Referenced by bigBlat(), and blat().
00913 { 00914 struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, maxPat, 00915 oocFile, isPep, allowOneMismatch); 00916 if (stepSize == 0) 00917 stepSize = tileSize; 00918 if (gf->segSize > 0) 00919 { 00920 gfLargeIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper); 00921 } 00922 else 00923 { 00924 gfSmallIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper); 00925 } 00926 return gf; 00927 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfIndexTransNibsAndTwoBits | ( | struct genoFind * | transGf[2][3], | |
| int | fileCount, | |||
| char * | fileNames[], | |||
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | allowOneMismatch, | |||
| boolean | doMask, | |||
| int | stepSize | |||
| ) |
Definition at line 717 of file genoFind.c.
References AllocArray, bits32, errAbort(), freeDnaSeq(), gfAllocLists(), gfNewEmpty(), gfZeroNonOverused(), gfZeroOverused(), twoBitFile::indexList, maskSimplePepRepeat(), maxTotalBases(), twoBitIndex::name, twoBitIndex::next, nibIsFile(), PATH_LEN, readMaskedNib(), readMaskedTwoBit(), safef(), dnaSeq::size, genoFind::totalSeqSize, transCountBothStrands(), transIndexBothStrands(), TRUE, twoBitCheckTotalSize(), twoBitClose(), twoBitIsFile(), and twoBitOpen().
Referenced by startServer().
00722 { 00723 struct genoFind *gf; 00724 int i,isRc, frame; 00725 bits32 offset[2][3]; 00726 char *fileName; 00727 struct dnaSeq *seq; 00728 int sourceCount = 0; 00729 long long totalBases = 0, warnAt = maxTotalBases(); 00730 00731 if (allowOneMismatch) 00732 errAbort("Don't currently support allowOneMismatch in gfIndexTransNibsAndTwoBits"); 00733 /* Allocate indices for all reading frames. */ 00734 for (isRc=0; isRc <= 1; ++isRc) 00735 { 00736 for (frame = 0; frame < 3; ++frame) 00737 { 00738 transGf[isRc][frame] = gf = gfNewEmpty(minMatch, maxGap, 00739 tileSize, stepSize, maxPat, oocFile, TRUE, allowOneMismatch); 00740 } 00741 } 00742 00743 /* Mask simple AA repeats (of period 1 and 2). */ 00744 for (isRc = 0; isRc <= 1; ++isRc) 00745 for (frame = 0; frame < 3; ++frame) 00746 maskSimplePepRepeat(transGf[isRc][frame]); 00747 00748 /* Scan through .nib and .2bit files once counting tiles. */ 00749 for (i=0; i<fileCount; ++i) 00750 { 00751 fileName = fileNames[i]; 00752 printf("Counting %s\n", fileName); 00753 if (nibIsFile(fileName)) 00754 { 00755 seq = readMaskedNib(fileName, doMask); 00756 transCountBothStrands(seq, transGf); 00757 sourceCount += 1; 00758 totalBases += seq->size; 00759 freeDnaSeq(&seq); 00760 } 00761 else if (twoBitIsFile(fileName)) 00762 { 00763 struct twoBitFile *tbf = twoBitOpen(fileName); 00764 struct twoBitIndex *index; 00765 totalBases += twoBitCheckTotalSize(tbf); 00766 00767 for (index = tbf->indexList; index != NULL; index = index->next) 00768 { 00769 seq = readMaskedTwoBit(tbf, index->name, doMask); 00770 transCountBothStrands(seq, transGf); 00771 sourceCount += 1; 00772 freeDnaSeq(&seq); 00773 } 00774 twoBitClose(&tbf); 00775 } 00776 else 00777 errAbort("Unrecognized file type %s", fileName); 00778 if (totalBases >= warnAt) 00779 errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that."); 00780 } 00781 00782 /* Get space for entries in indexed of all reading frames. */ 00783 for (isRc=0; isRc <= 1; ++isRc) 00784 { 00785 for (frame = 0; frame < 3; ++frame) 00786 { 00787 gf = transGf[isRc][frame]; 00788 gfAllocLists(gf); 00789 gfZeroNonOverused(gf); 00790 AllocArray(gf->sources, sourceCount); 00791 gf->sourceCount = sourceCount; 00792 offset[isRc][frame] = 0; 00793 } 00794 } 00795 00796 /* Scan through nibs a second time building index. */ 00797 sourceCount = 0; 00798 for (i=0; i<fileCount; ++i) 00799 { 00800 fileName = fileNames[i]; 00801 printf("Indexing %s\n", fileName); 00802 if (nibIsFile(fileName)) 00803 { 00804 seq = readMaskedNib(fileName, doMask); 00805 transIndexBothStrands(seq, transGf, offset, sourceCount, fileName); 00806 freeDnaSeq(&seq); 00807 sourceCount += 1; 00808 } 00809 else /* .2bit file */ 00810 { 00811 struct twoBitFile *tbf = twoBitOpen(fileName); 00812 struct twoBitIndex *index; 00813 for (index = tbf->indexList; index != NULL; index = index->next) 00814 { 00815 char nameBuf[PATH_LEN+256]; 00816 safef(nameBuf, sizeof(nameBuf), "%s:%s", fileName, index->name); 00817 seq = readMaskedTwoBit(tbf, index->name, doMask); 00818 transIndexBothStrands(seq, transGf, offset, sourceCount, nameBuf); 00819 sourceCount += 1; 00820 freeDnaSeq(&seq); 00821 } 00822 twoBitClose(&tbf); 00823 } 00824 } 00825 00826 for (isRc=0; isRc <= 1; ++isRc) 00827 { 00828 for (frame = 0; frame < 3; ++frame) 00829 { 00830 gf = transGf[isRc][frame]; 00831 gf->totalSeqSize = offset[isRc][frame]; 00832 gfZeroOverused(gf); 00833 } 00834 } 00835 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct genoFind* gfLargeIndexSeq | ( | struct genoFind * | gf, | |
| bioSeq * | seqList, | |||
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | isPep, | |||
| boolean | maskUpper | |||
| ) | [static, read] |
Definition at line 873 of file genoFind.c.
References AllocArray, bits32, gfAddLargeSeq(), gfAllocLargeLists(), gfCountSeq(), gfZeroNonOverused(), gfZeroOverused(), maskFromUpperCaseSeq(), dnaSeq::next, gfSeqSource::seq, slCount(), and ss.
Referenced by gfIndexSeq().
00877 { 00878 int seqCount = slCount(seqList); 00879 bioSeq *seq; 00880 int i; 00881 bits32 offset = 0; 00882 struct gfSeqSource *ss; 00883 00884 for (seq = seqList; seq != NULL; seq = seq->next) 00885 gfCountSeq(gf, seq); 00886 gfAllocLargeLists(gf); 00887 gfZeroNonOverused(gf); 00888 AllocArray(gf->sources, seqCount); 00889 gf->sourceCount = seqCount; 00890 for (i=0, seq = seqList; i<seqCount; ++i, seq = seq->next) 00891 { 00892 gfAddLargeSeq(gf, seq, offset); 00893 ss = gf->sources+i; 00894 ss->seq = seq; 00895 ss->start = offset; 00896 offset += seq->size; 00897 ss->end = offset; 00898 if (maskUpper) 00899 ss->maskedBits = maskFromUpperCaseSeq(seq); 00900 } 00901 gf->totalSeqSize = offset; 00902 gfZeroOverused(gf); 00903 return gf; 00904 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfMakeOoc | ( | char * | outName, | |
| char * | files[], | |||
| int | fileCount, | |||
| int | tileSize, | |||
| bits32 | maxPat, | |||
| enum gfType | tType | |||
| ) |
Definition at line 1928 of file genoFind.c.
References bits32, carefulClose(), dnaSeq::dna, errAbort(), FALSE, faReadAllSeq(), freeDnaSeqList(), genoFindFree(), gfCountSeq(), gfMaxGap, gfMinMatch, gfNewEmpty(), gftDnaX, gftProt, gftRnaX, genoFind::listSizes, mustOpen(), dnaSeq::next, nibIsFile(), nibLoadAll(), oocSig, reverseComplement(), genoFind::segSize, dnaSeq::size, genoFind::tileSpaceSize, toLowerN(), trans3Free(), trans3New(), twoBitIsFile(), twoBitLoadAll(), and writeOne.
Referenced by blat().
01931 { 01932 boolean dbIsPep = (tType == gftProt || tType == gftDnaX || tType == gftRnaX); 01933 struct genoFind *gf = gfNewEmpty(gfMinMatch, gfMaxGap, tileSize, tileSize, 01934 maxPat, NULL, dbIsPep, FALSE); 01935 bits32 *sizes = gf->listSizes; 01936 int tileSpaceSize = gf->tileSpaceSize; 01937 bioSeq *seq, *seqList; 01938 bits32 sig = oocSig, psz = tileSize; 01939 bits32 i; 01940 int oocCount = 0; 01941 char *inName; 01942 FILE *f = mustOpen(outName, "w"); 01943 01944 if (gf->segSize > 0) 01945 errAbort("Don't yet know how to make ooc files for large tile sizes."); 01946 for (i=0; i<fileCount; ++i) 01947 { 01948 inName = files[i]; 01949 printf("Loading %s\n", inName); 01950 if (nibIsFile(inName)) 01951 { 01952 seqList = nibLoadAll(inName); 01953 } 01954 else if (twoBitIsFile(inName)) 01955 { 01956 seqList = twoBitLoadAll(inName); 01957 for (seq = seqList; seq != NULL; seq = seq->next) 01958 toLowerN(seq->dna, seq->size); 01959 } 01960 else 01961 { 01962 seqList = faReadAllSeq(inName, tType != gftProt); 01963 } 01964 printf("Counting %s\n", inName); 01965 for (seq = seqList; seq != NULL; seq = seq->next) 01966 { 01967 int isRc; 01968 for (isRc = 0; isRc <= 1; ++isRc) 01969 { 01970 if (tType == gftDnaX || tType == gftRnaX) 01971 { 01972 struct trans3 *t3 = trans3New(seq); 01973 int frame; 01974 for (frame=0; frame<3; ++frame) 01975 { 01976 gfCountSeq(gf, t3->trans[frame]); 01977 } 01978 trans3Free(&t3); 01979 } 01980 else 01981 { 01982 gfCountSeq(gf, seq); 01983 } 01984 if (tType == gftProt || tType == gftRnaX) 01985 break; 01986 else 01987 { 01988 reverseComplement(seq->dna, seq->size); 01989 } 01990 } 01991 } 01992 freeDnaSeqList(&seqList); 01993 } 01994 printf("Writing %s\n", outName); 01995 writeOne(f, sig); 01996 writeOne(f, psz); 01997 for (i=0; i<tileSpaceSize; ++i) 01998 { 01999 if (sizes[i] >= maxPat) 02000 { 02001 writeOne(f, i); 02002 ++oocCount; 02003 } 02004 } 02005 carefulClose(&f); 02006 genoFindFree(&gf); 02007 printf("Wrote %d overused %d-mers to %s\n", oocCount, tileSize, outName); 02008 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct genoFind* gfNewEmpty | ( | int | minMatch, | |
| int | maxGap, | |||
| int | tileSize, | |||
| int | stepSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | isPep, | |||
| boolean | allowOneMismatch | |||
| ) | [static, read] |
Definition at line 112 of file genoFind.c.
References AllocVar, genoFind::allowOneMismatch, BIGNUM, genoFind::endLists, errAbort(), gfCheckTileSize(), gfPowerOf20(), genoFind::isPep, genoFind::lists, genoFind::listSizes, genoFind::maxGap, genoFind::maxPat, genoFind::minMatch, needHugeZeroedMem(), oocMaskCounts(), oocMaskSimpleRepeats(), genoFind::segSize, genoFind::stepSize, genoFind::tileMask, genoFind::tileSize, and genoFind::tileSpaceSize.
Referenced by gfIndexNibsAndTwoBits(), gfIndexSeq(), gfIndexTransNibsAndTwoBits(), and gfMakeOoc().
00116 { 00117 struct genoFind *gf; 00118 int tileSpaceSize; 00119 int segSize = 0; 00120 00121 gfCheckTileSize(tileSize, isPep); 00122 if (stepSize == 0) 00123 stepSize = tileSize; 00124 AllocVar(gf); 00125 if (isPep) 00126 { 00127 if (tileSize > 5) 00128 { 00129 segSize = tileSize - 5; 00130 } 00131 tileSpaceSize = gf->tileSpaceSize = gfPowerOf20(tileSize-segSize); 00132 } 00133 else 00134 { 00135 int seedBitSize; 00136 if (tileSize > 12) 00137 { 00138 segSize = tileSize - 12; 00139 } 00140 seedBitSize = (tileSize-segSize)*2; 00141 tileSpaceSize = gf->tileSpaceSize = (1<<seedBitSize); 00142 gf->tileMask = tileSpaceSize-1; 00143 } 00144 gf->segSize = segSize; 00145 gf->tileSize = tileSize; 00146 gf->stepSize = stepSize; 00147 gf->isPep = isPep; 00148 gf->allowOneMismatch = allowOneMismatch; 00149 if (segSize > 0) 00150 { 00151 gf->endLists = needHugeZeroedMem(tileSpaceSize * sizeof(gf->endLists[0])); 00152 maxPat = BIGNUM; /* Don't filter out overused on the big ones. It is 00153 * unnecessary and would be quite complex. */ 00154 } 00155 else 00156 { 00157 gf->lists = needHugeZeroedMem(tileSpaceSize * sizeof(gf->lists[0])); 00158 } 00159 gf->listSizes = needHugeZeroedMem(tileSpaceSize * sizeof(gf->listSizes[0])); 00160 gf->minMatch = minMatch; 00161 gf->maxGap = maxGap; 00162 gf->maxPat = maxPat; 00163 if (oocFile != NULL) 00164 { 00165 if (segSize > 0) 00166 errAbort("Don't yet support ooc on large tile sizes"); 00167 oocMaskCounts(oocFile, gf->listSizes, tileSize, maxPat); 00168 oocMaskSimpleRepeats(gf->listSizes, tileSize, maxPat); 00169 } 00170 return gf; 00171 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gfClump* gfPcrClumps | ( | struct genoFind * | gf, | |
| char * | fPrimer, | |||
| int | fPrimerSize, | |||
| char * | rPrimer, | |||
| int | rPrimerSize, | |||
| int | minDistance, | |||
| int | maxDistance | |||
| ) | [read] |
Definition at line 2155 of file genoFind.c.
References errAbort(), genoFind::isPep, pcrClumps(), reverseComplement(), genoFind::segSize, and tolowers().
Referenced by genoPcrDirect(), and pcrQuery().
02158 { 02159 struct gfClump *clumpList; 02160 if (gf->segSize > 0) 02161 errAbort("Can't do PCR on large tile sizes"); 02162 if (gf->isPep) 02163 errAbort("Can't do PCR on protein/translated index"); 02164 tolowers(fPrimer); 02165 tolowers(rPrimer); 02166 reverseComplement(rPrimer, rPrimerSize); 02167 clumpList = pcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 02168 minDistance, maxDistance); 02169 reverseComplement(rPrimer, rPrimerSize); 02170 return clumpList; 02171 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int gfPepTile | ( | AA * | pep, | |
| int | n | |||
| ) |
Definition at line 206 of file genoFind.c.
References aaVal.
Referenced by gfAddLargeSeq(), gfAddSeq(), gfCountSeq(), gfSegmentedFindHits(), gfSegmentedFindNearHits(), gfStraightFindHits(), and gfStraightFindNearHits().
00208 { 00209 int tile = 0; 00210 int aa; 00211 while (--n >= 0) 00212 { 00213 tile *= 20; 00214 aa = aaVal[(int)(*pep++)]; 00215 if (aa < 0) 00216 return -1; 00217 tile += aa; 00218 } 00219 return tile; 00220 }
Here is the caller graph for this function:

| static void gfPipeHandler | ( | int | sigNum | ) | [static] |
Definition at line 32 of file genoFind.c.
References pipeBroke, and TRUE.
Referenced by gfCatchPipes().
Here is the caller graph for this function:

| int gfPowerOf20 | ( | int | n | ) |
Definition at line 88 of file genoFind.c.
Referenced by gfNewEmpty().
Here is the caller graph for this function:

| int gfReadMulti | ( | int | sd, | |
| void * | vBuf, | |||
| size_t | size | |||
| ) |
Definition at line 44 of file genoFind.c.
Referenced by startServer().
00046 { 00047 char *buf = vBuf; 00048 size_t totalRead = 0; 00049 int oneRead; 00050 00051 while (totalRead < size) 00052 { 00053 oneRead = read(sd, buf + totalRead, size - totalRead); 00054 if (oneRead < 0) 00055 { 00056 perror("Couldn't finish large read"); 00057 return oneRead; 00058 } 00059 else if (oneRead == 0) 00060 /* Avoid an infinite loop when the client closed the socket. */ 00061 break; 00062 totalRead += oneRead; 00063 } 00064 return totalRead; 00065 }
Here is the caller graph for this function:

| static struct gfHit* gfSegmentedFindHits | ( | struct genoFind * | gf, | |
| aaSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [static, read] |
Definition at line 1632 of file genoFind.c.
References bitCountRange(), bits16, bits32, dnaSeq::dna, genoFind::endLists, findSource(), gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::listSizes, lm, lmAllocVar, gfHit::qStart, genoFind::segSize, dnaSeq::size, slAddHead, genoFind::tileSize, tileSize, and gfHit::tStart.
Referenced by gfFindHitsWithQmask().
01637 { 01638 struct gfHit *hitList = NULL, *hit; 01639 int size = seq->size; 01640 int tileSize = gf->tileSize; 01641 int tileTailSize = gf->segSize; 01642 int tileHeadSize = gf->tileSize - tileTailSize; 01643 int lastStart = size - tileSize; 01644 char *poly = seq->dna; 01645 int i, j; 01646 int tileHead, tileTail; 01647 int listSize; 01648 bits32 qStart; 01649 bits16 *endList; 01650 int hitCount = 0; 01651 int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); 01652 01653 01654 initNtLookup(); 01655 for (i=0; i<=lastStart; ++i) 01656 { 01657 if (qMaskBits == NULL || bitCountRange(qMaskBits, i+qMaskOffset, tileSize) == 0) 01658 { 01659 tileHead = makeTile(poly+i, tileHeadSize); 01660 if (tileHead < 0) 01661 continue; 01662 tileTail = makeTile(poly+i+tileHeadSize, tileTailSize); 01663 if (tileTail < 0) 01664 continue; 01665 listSize = gf->listSizes[tileHead]; 01666 qStart = i; 01667 endList = gf->endLists[tileHead]; 01668 for (j=0; j<listSize; ++j) 01669 { 01670 if (endList[0] == tileTail) 01671 { 01672 int tStart = (endList[1]<<16) + endList[2]; 01673 if (target == NULL || 01674 (target == findSource(gf, tStart) 01675 && tStart >= tMin && tStart < tMax) ) 01676 { 01677 lmAllocVar(lm,hit); 01678 hit->qStart = qStart; 01679 hit->tStart = tStart; 01680 hit->diagonal = tStart + size - qStart; 01681 slAddHead(&hitList, hit); 01682 ++hitCount; 01683 } 01684 } 01685 endList += 3; 01686 } 01687 } 01688 } 01689 *retHitCount = hitCount; 01690 return hitList; 01691 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct gfHit* gfSegmentedFindNearHits | ( | struct genoFind * | gf, | |
| aaSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [static, read] |
Definition at line 1693 of file genoFind.c.
References aaVal, bitCountRange(), bits16, bits32, dnaSeq::dna, genoFind::endLists, findSource(), gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::listSizes, lm, lmAllocVar, ntVal, gfHit::qStart, genoFind::segSize, dnaSeq::size, slAddHead, genoFind::tileSize, tileSize, and gfHit::tStart.
Referenced by gfFindHitsWithQmask().
01698 { 01699 struct gfHit *hitList = NULL, *hit; 01700 int size = seq->size; 01701 int tileSize = gf->tileSize; 01702 int tileTailSize = gf->segSize; 01703 int tileHeadSize = gf->tileSize - tileTailSize; 01704 int lastStart = size - tileSize; 01705 char *poly = seq->dna; 01706 int i, j; 01707 int tileHead, tileTail; 01708 int listSize; 01709 bits32 qStart; 01710 bits16 *endList; 01711 int hitCount = 0; 01712 int varPos, varVal; /* Variable position. */ 01713 int (*makeTile)(char *poly, int n); 01714 int alphabetSize; 01715 char oldChar, zeroChar; 01716 int headPosMul, tailPosMul, avoid; 01717 boolean modTail; 01718 int *seqValLookup; 01719 01720 01721 initNtLookup(); 01722 if (gf->isPep) 01723 { 01724 makeTile = gfPepTile; 01725 alphabetSize = 20; 01726 zeroChar = 'A'; 01727 seqValLookup = aaVal; 01728 } 01729 else 01730 { 01731 makeTile = gfDnaTile; 01732 alphabetSize = 4; 01733 zeroChar = 't'; 01734 seqValLookup = ntVal; 01735 } 01736 01737 for (i=0; i<=lastStart; ++i) 01738 { 01739 if (qMaskBits == NULL || bitCountRange(qMaskBits, i+qMaskOffset, tileSize) == 0) 01740 { 01741 headPosMul = tailPosMul = 1; 01742 for (varPos = tileSize-1; varPos >= 0; --varPos) 01743 { 01744 /* Make a tile that has zero value at variable position. */ 01745 modTail = (varPos >= tileHeadSize); 01746 oldChar = poly[i+varPos]; 01747 poly[i+varPos] = zeroChar; 01748 tileHead = makeTile(poly+i, tileHeadSize); 01749 tileTail = makeTile(poly+i+tileHeadSize, tileTailSize); 01750 poly[i+varPos] = oldChar; 01751 01752 /* Avoid checking the unmodified tile multiple times. */ 01753 if (varPos == 0) 01754 avoid = -1; 01755 else 01756 avoid = seqValLookup[(int)oldChar]; 01757 01758 if (tileHead >= 0 && tileTail >= 0) 01759 { 01760 for (varVal=0; varVal<alphabetSize; ++varVal) 01761 { 01762 if (varVal != avoid) 01763 { 01764 listSize = gf->listSizes[tileHead]; 01765 qStart = i; 01766 endList = gf->endLists[tileHead]; 01767 for (j=0; j<listSize; ++j) 01768 { 01769 if (endList[0] == tileTail) 01770 { 01771 int tStart = (endList[1]<<16) + endList[2]; 01772 if (target == NULL || 01773 (target == findSource(gf, tStart) 01774 && tStart >= tMin && tStart < tMax) ) 01775 { 01776 lmAllocVar(lm,hit); 01777 hit->qStart = qStart; 01778 hit->tStart = tStart; 01779 hit->diagonal = tStart + size - qStart; 01780 slAddHead(&hitList, hit); 01781 ++hitCount; 01782 } 01783 } 01784 endList += 3; 01785 } 01786 } 01787 if (modTail) 01788 tileTail += tailPosMul; 01789 else 01790 tileHead += headPosMul; 01791 } 01792 } 01793 if (modTail) 01794 tailPosMul *= alphabetSize; 01795 else 01796 headPosMul *= alphabetSize; 01797 } 01798 } 01799 } 01800 *retHitCount = hitCount; 01801 return hitList; 01802 }
Here is the call graph for this function:

Here is the caller graph for this function:

| char* gfSignature | ( | ) |
Definition at line 22 of file genoFind.c.
Referenced by getFileList(), gfPcrGetRanges(), pcrServer(), queryServer(), startSeqQuery(), startServer(), statusServer(), and stopServer().
Here is the caller graph for this function:

| static struct genoFind* gfSmallIndexSeq | ( | struct genoFind * | gf, | |
| bioSeq * | seqList, | |||
| int | minMatch, | |||
| int | maxGap, | |||
| int | tileSize, | |||
| int | maxPat, | |||
| char * | oocFile, | |||
| boolean | isPep, | |||
| boolean | maskUpper | |||
| ) | [static, read] |
Definition at line 837 of file genoFind.c.
References AllocArray, bits32, gfAddSeq(), gfAllocLists(), gfCountSeq(), gfZeroNonOverused(), gfZeroOverused(), maskFromUpperCaseSeq(), maskSimplePepRepeat(), dnaSeq::next, gfSeqSource::seq, slCount(), and ss.
Referenced by gfIndexSeq().
00841 { 00842 int seqCount = slCount(seqList); 00843 bioSeq *seq; 00844 int i; 00845 bits32 offset = 0; 00846 struct gfSeqSource *ss; 00847 00848 if (isPep) 00849 maskSimplePepRepeat(gf); 00850 for (seq = seqList; seq != NULL; seq = seq->next) 00851 gfCountSeq(gf, seq); 00852 gfAllocLists(gf); 00853 gfZeroNonOverused(gf); 00854 if (seqCount > 0) 00855 AllocArray(gf->sources, seqCount); 00856 gf->sourceCount = seqCount; 00857 for (i=0, seq = seqList; i<seqCount; ++i, seq = seq->next) 00858 { 00859 gfAddSeq(gf, seq, offset); 00860 ss = gf->sources+i; 00861 ss->seq = seq; 00862 ss->start = offset; 00863 offset += seq->size; 00864 ss->end = offset; 00865 if (maskUpper) 00866 ss->maskedBits = maskFromUpperCaseSeq(seq); 00867 } 00868 gf->totalSeqSize = offset; 00869 gfZeroOverused(gf); 00870 return gf; 00871 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct gfHit* gfStraightFindHits | ( | struct genoFind * | gf, | |
| aaSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [static, read] |
Definition at line 1482 of file genoFind.c.
References bitCountRange(), bits32, dnaSeq::dna, findSource(), gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::lists, genoFind::listSizes, lm, lmAllocVar, gfHit::qStart, dnaSeq::size, slAddHead, genoFind::tileSize, tileSize, and gfHit::tStart.
Referenced by gfFindHitsWithQmask().
01487 { 01488 struct gfHit *hitList = NULL, *hit; 01489 int size = seq->size; 01490 int tileSize = gf->tileSize; 01491 int lastStart = size - tileSize; 01492 char *poly = seq->dna; 01493 int i, j; 01494 int tile; 01495 int listSize; 01496 bits32 qStart, *tList; 01497 int hitCount = 0; 01498 int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); 01499 01500 initNtLookup(); 01501 for (i=0; i<=lastStart; ++i) 01502 { 01503 tile = makeTile(poly+i, tileSize); 01504 if (tile < 0) 01505 continue; 01506 listSize = gf->listSizes[tile]; 01507 if (listSize > 0) 01508 { 01509 qStart = i; 01510 if (qMaskBits == NULL || bitCountRange(qMaskBits, qStart+qMaskOffset, tileSize) == 0) 01511 { 01512 tList = gf->lists[tile]; 01513 for (j=0; j<listSize; ++j) 01514 { 01515 int tStart = tList[j]; 01516 if (target == NULL || 01517 (target == findSource(gf, tStart) && tStart >= tMin && tStart < tMax) ) 01518 { 01519 lmAllocVar(lm,hit); 01520 hit->qStart = qStart; 01521 hit->tStart = tStart; 01522 hit->diagonal = tStart + size - qStart; 01523 slAddHead(&hitList, hit); 01524 ++hitCount; 01525 } 01526 } 01527 } 01528 } 01529 } 01530 *retHitCount = hitCount; 01531 return hitList; 01532 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct gfHit* gfStraightFindNearHits | ( | struct genoFind * | gf, | |
| aaSeq * | seq, | |||
| Bits * | qMaskBits, | |||
| int | qMaskOffset, | |||
| struct lm * | lm, | |||
| int * | retHitCount, | |||
| struct gfSeqSource * | target, | |||
| int | tMin, | |||
| int | tMax | |||
| ) | [static, read] |
Definition at line 1534 of file genoFind.c.
References aaVal, bitCountRange(), bits32, dnaSeq::dna, findSource(), gfDnaTile(), gfPepTile(), initNtLookup(), genoFind::isPep, genoFind::lists, genoFind::listSizes, lm, lmAllocVar, ntVal, gfHit::qStart, dnaSeq::size, slAddHead, genoFind::tileSize, tileSize, and gfHit::tStart.
Referenced by gfFindHitsWithQmask().
01539 { 01540 struct gfHit *hitList = NULL, *hit; 01541 int size = seq->size; 01542 int tileSize = gf->tileSize; 01543 int lastStart = size - tileSize; 01544 char *poly = seq->dna; 01545 int i, j; 01546 int tile; 01547 int listSize; 01548 bits32 qStart, *tList; 01549 int hitCount = 0; 01550 int varPos, varVal; /* Variable position. */ 01551 int (*makeTile)(char *poly, int n); 01552 int alphabetSize; 01553 char oldChar, zeroChar; 01554 int *seqValLookup; 01555 int posMul, avoid; 01556 01557 initNtLookup(); 01558 if (gf->isPep) 01559 { 01560 makeTile = gfPepTile; 01561 alphabetSize = 20; 01562 zeroChar = 'A'; 01563 seqValLookup = aaVal; 01564 } 01565 else 01566 { 01567 makeTile = gfDnaTile; 01568 alphabetSize = 4; 01569 zeroChar = 't'; 01570 seqValLookup = ntVal; 01571 } 01572 01573 for (i=0; i<=lastStart; ++i) 01574 { 01575 posMul = 1; 01576 for (varPos = tileSize-1; varPos >=0; --varPos) 01577 { 01578 /* Make a tile that has zero value at variable position. */ 01579 oldChar = poly[i+varPos]; 01580 poly[i+varPos] = zeroChar; 01581 tile = makeTile(poly+i, tileSize); 01582 poly[i+varPos] = oldChar; 01583 01584 /* Avoid checking the unmodified tile multiple times. */ 01585 if (varPos == 0) 01586 avoid = -1; 01587 else 01588 avoid = seqValLookup[(int)oldChar]; 01589 01590 if (tile >= 0) 01591 { 01592 /* Look up all possible values of variable position. */ 01593 for (varVal=0; varVal<alphabetSize; ++varVal) 01594 { 01595 if (varVal != avoid) 01596 { 01597 listSize = gf->listSizes[tile]; 01598 if (listSize > 0) 01599 { 01600 qStart = i; 01601 if (qMaskBits == NULL || bitCountRange(qMaskBits, qStart+qMaskOffset, tileSize) == 0) 01602 { 01603 tList = gf->lists[tile]; 01604 for (j=0; j<listSize; ++j) 01605 { 01606 int tStart = tList[j]; 01607 if (target == NULL || 01608 (target == findSource(gf, tStart) 01609 && tStart >= tMin && tStart < tMax) ) 01610 { 01611 lmAllocVar(lm,hit); 01612 hit->qStart = qStart; 01613 hit->tStart = tStart; 01614 hit->diagonal = tStart + size - qStart; 01615 slAddHead(&hitList, hit); 01616 ++hitCount; 01617 } 01618 } 01619 } 01620 } 01621 } 01622 tile += posMul; 01623 } 01624 } 01625 posMul *= alphabetSize; 01626 } 01627 } 01628 *retHitCount = hitCount; 01629 return hitList; 01630 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfTransFindClumps | ( | struct genoFind * | gfs[3], | |
| aaSeq * | seq, | |||
| struct gfClump * | clumps[3], | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) |
Definition at line 1897 of file genoFind.c.
References gfFindClumps(), gfClump::hitCount, and lm.
Referenced by gfFindAlignAaTrans(), gfTransTransFindClumps(), and transQuery().
01899 { 01900 int frame; 01901 int oneHit; 01902 int hitCount = 0; 01903 for (frame = 0; frame < 3; ++frame) 01904 { 01905 clumps[frame] = gfFindClumps(gfs[frame], seq, lm, &oneHit); 01906 hitCount += oneHit; 01907 } 01908 *retHitCount = hitCount; 01909 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfTransTransFindClumps | ( | struct genoFind * | gfs[3], | |
| aaSeq * | seqs[3], | |||
| struct gfClump * | clumps[3][3], | |||
| struct lm * | lm, | |||
| int * | retHitCount | |||
| ) |
Definition at line 1911 of file genoFind.c.
References gfTransFindClumps(), gfClump::hitCount, and lm.
Referenced by gfTransTransFindBundles(), and transTransQuery().
01915 { 01916 int qFrame; 01917 int oneHit; 01918 int hitCount = 0; 01919 01920 for (qFrame = 0; qFrame<3; ++qFrame) 01921 { 01922 gfTransFindClumps(gfs, seqs[qFrame], clumps[qFrame], lm, &oneHit); 01923 hitCount += oneHit; 01924 } 01925 *retHitCount = hitCount; 01926 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void gfZeroNonOverused | ( | struct genoFind * | gf | ) | [static] |
Definition at line 496 of file genoFind.c.
References bits32, genoFind::listSizes, genoFind::maxPat, and genoFind::tileSpaceSize.
Referenced by gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), gfLargeIndexSeq(), and gfSmallIndexSeq().
00498 { 00499 bits32 *sizes = gf->listSizes; 00500 int tileSpaceSize = gf->tileSpaceSize, i; 00501 int maxPat = gf->maxPat; 00502 int overCount = 0; 00503 00504 for (i=0; i<tileSpaceSize; ++i) 00505 { 00506 if (sizes[i] < maxPat) 00507 { 00508 sizes[i] = 0; 00509 ++overCount; 00510 } 00511 } 00512 }
Here is the caller graph for this function:

| static void gfZeroOverused | ( | struct genoFind * | gf | ) | [static] |
Definition at line 478 of file genoFind.c.
References bits32, genoFind::listSizes, genoFind::maxPat, and genoFind::tileSpaceSize.
Referenced by gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), gfLargeIndexSeq(), and gfSmallIndexSeq().
00480 { 00481 bits32 *sizes = gf->listSizes; 00482 int tileSpaceSize = gf->tileSpaceSize, i; 00483 int maxPat = gf->maxPat; 00484 int overCount = 0; 00485 00486 for (i=0; i<tileSpaceSize; ++i) 00487 { 00488 if (sizes[i] >= maxPat) 00489 { 00490 sizes[i] = 0; 00491 ++overCount; 00492 } 00493 } 00494 }
Here is the caller graph for this function:

| static void initNtLookup | ( | ) | [static] |
Definition at line 175 of file genoFind.c.
References A_BASE_VAL, ArraySize, C_BASE_VAL, FALSE, G_BASE_VAL, ntLookup, T_BASE_VAL, and TRUE.
Referenced by gfAddLargeSeq(), gfAddSeq(), gfCountSeq(), gfSegmentedFindHits(), gfSegmentedFindNearHits(), gfStraightFindHits(), and gfStraightFindNearHits().
00176 { 00177 static boolean initted = FALSE; 00178 if (!initted) 00179 { 00180 int i; 00181 for (i=0; i<ArraySize(ntLookup); ++i) 00182 ntLookup[i] = -1; 00183 ntLookup['a'] = A_BASE_VAL; 00184 ntLookup['c'] = C_BASE_VAL; 00185 ntLookup['t'] = T_BASE_VAL; 00186 ntLookup['g'] = G_BASE_VAL; 00187 initted = TRUE; 00188 } 00189 }
Here is the caller graph for this function:

| static void maskSimplePepRepeat | ( | struct genoFind * | gf | ) | [static] |
Definition at line 604 of file genoFind.c.
References bits32, genoFind::listSizes, genoFind::maxPat, genoFind::tileSize, and tileSize.
Referenced by gfIndexTransNibsAndTwoBits(), and gfSmallIndexSeq().
00607 { 00608 int i; 00609 int tileSize = gf->tileSize; 00610 int maxPat = gf->maxPat; 00611 bits32 *listSizes = gf->listSizes; 00612 00613 for (i=0; i<20; ++i) 00614 { 00615 int j, k; 00616 for (j=0; j<20; ++j) 00617 { 00618 int tile = 0; 00619 for (k=0; k<tileSize; ++k) 00620 { 00621 tile *= 20; 00622 if (k&1) 00623 tile += j; 00624 else 00625 tile += i; 00626 } 00627 listSizes[tile] = maxPat; 00628 } 00629 } 00630 }
Here is the caller graph for this function:

| long long maxTotalBases | ( | ) |
Definition at line 274 of file genoFind.c.
Referenced by gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), and twoBitCheckTotalSize().
00276 { 00277 long long maxBases = 1024*1024; 00278 maxBases *= 4*1024; 00279 return maxBases; 00280 }
Here is the caller graph for this function:

| static void mergeAdd | ( | struct binKeeper * | bk, | |
| int | start, | |||
| int | end, | |||
| struct gfSeqSource * | src | |||
| ) | [static] |
Definition at line 2038 of file genoFind.c.
References binKeeperAdd(), binKeeperFind(), binKeeperRemove(), binElement::end, binElement::next, slFreeList(), and binElement::start.
Referenced by pcrClumps().
02041 { 02042 struct binElement *iEl, *iList = binKeeperFind(bk, start, end); 02043 for (iEl = iList; iEl != NULL; iEl = iEl->next) 02044 { 02045 if (iEl->start < start) 02046 start = iEl->start; 02047 if (iEl->end > end) 02048 end = iEl->end; 02049 binKeeperRemove(bk, iEl->start, iEl->end, src); 02050 } 02051 slFreeList(&iList); 02052 binKeeperAdd(bk, start, end, src); 02053 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct gfClump* pcrClumps | ( | struct genoFind * | gf, | |
| char * | fPrimer, | |||
| int | fPrimerSize, | |||
| char * | rPrimer, | |||
| int | rPrimerSize, | |||
| int | minDistance, | |||
| int | maxDistance | |||
| ) | [static, read] |
Definition at line 2055 of file genoFind.c.
References AllocArray, AllocVar, binKeeperFindAll(), binKeeperFree(), binKeeperNew(), bits32, errAbort(), findSource(), freez(), gfDnaTile(), hashAdd(), hashElFreeList(), hashElListHash(), hashFindVal(), hashFree, genoFind::lists, genoFind::listSizes, mergeAdd(), newHash(), binElement::next, hashEl::next, slAddHead, slFreeList(), gfSeqSource::start, genoFind::tileSize, tileSize, and hashEl::val.
Referenced by gfPcrClumps().
02058 { 02059 struct gfClump *clumpList = NULL; 02060 int tileSize = gf->tileSize; 02061 int fTile; 02062 int fTileCount = fPrimerSize - tileSize; 02063 int *rTiles, rTile; 02064 int rTileCount = rPrimerSize - tileSize; 02065 int fTileIx,rTileIx,fPosIx,rPosIx; 02066 bits32 *fPosList, fPos, *rPosList, rPos; 02067 int fPosListSize, rPosListSize; 02068 struct hash *targetHash = newHash(0); 02069 02070 /* Build up array of all tiles in reverse primer. */ 02071 AllocArray(rTiles, rTileCount); 02072 for (rTileIx = 0; rTileIx<rTileCount; ++rTileIx) 02073 { 02074 rTiles[rTileIx] = gfDnaTile(rPrimer + rTileIx, tileSize); 02075 if (rTiles[rTileIx] == -1) 02076 errAbort("Bad char in reverse primer sequence: %s", rPrimer); 02077 } 02078 02079 /* Loop through all tiles in forward primer. */ 02080 for (fTileIx=0; fTileIx<fTileCount; ++fTileIx) 02081 { 02082 fTile = gfDnaTile(fPrimer + fTileIx, tileSize); 02083 if (fTile >= 0) 02084 { 02085 fPosListSize = gf->listSizes[fTile]; 02086 fPosList = gf->lists[fTile]; 02087 for (fPosIx=0; fPosIx < fPosListSize; ++fPosIx) 02088 { 02089 fPos = fPosList[fPosIx]; 02090 /* Loop through hits to reverse primer. */ 02091 for (rTileIx=0; rTileIx < rTileCount; ++rTileIx) 02092 { 02093 rTile = rTiles[rTileIx]; 02094 rPosListSize = gf->listSizes[rTile]; 02095 rPosList = gf->lists[rTile]; 02096 for (rPosIx=0; rPosIx < rPosListSize; ++rPosIx) 02097 { 02098 rPos = rPosList[rPosIx]; 02099 if (rPos > fPos) 02100 { 02101 int distance = rPos - fPos; 02102 if (distance >= minDistance && distance <= maxDistance) 02103 { 02104 struct gfSeqSource *target = findSource(gf, fPos); 02105 if (rPos < target->end) 02106 { 02107 struct binKeeper *bk; 02108 int tStart = target->start; 02109 char *tName = target->fileName; 02110 if (tName == NULL) 02111 tName = target->seq->name; 02112 if ((bk = hashFindVal(targetHash, tName)) == NULL) 02113 { 02114 bk = binKeeperNew(0, target->end - tStart); 02115 hashAdd(targetHash, tName, bk); 02116 } 02117 mergeAdd(bk, fPos - tStart, rPos - tStart, target); 02118 } 02119 } 02120 } 02121 } 02122 } 02123 } 02124 } 02125 } 02126 02127 /* Make clumps and clean up temporary structures. */ 02128 { 02129 struct hashEl *tList, *tEl; 02130 tList = hashElListHash(targetHash); 02131 for (tEl = tList; tEl != NULL; tEl = tEl->next) 02132 { 02133 struct binKeeper *bk = tEl->val; 02134 struct binElement *bkList, *bkEl; 02135 bkList = binKeeperFindAll(bk); 02136 for (bkEl = bkList; bkEl != NULL; bkEl = bkEl->next) 02137 { 02138 struct gfClump *clump; 02139 AllocVar(clump); 02140 clump->target = bkEl->val; 02141 clump->tStart = bkEl->start; 02142 clump->tEnd = bkEl->end; 02143 slAddHead(&clumpList, clump); 02144 } 02145 slFreeList(&bkList); 02146 binKeeperFree(&bk); 02147 } 02148 hashElFreeList(&tList); 02149 hashFree(&targetHash); 02150 } 02151 freez(&rTiles); 02152 return clumpList; 02153 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct dnaSeq* readMaskedNib | ( | char * | fileName, | |
| boolean | mask | |||
| ) | [read] |
Definition at line 632 of file genoFind.c.
References dnaSeq::dna, NIB_MASK_MIXED, nibLoadAll(), nibLoadAllMasked(), dnaSeq::size, toggleCase(), and upperToN().
Referenced by gfIndexTransNibsAndTwoBits().
00635 { 00636 struct dnaSeq *seq; 00637 if (mask) 00638 { 00639 seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName); 00640 toggleCase(seq->dna, seq->size); 00641 upperToN(seq->dna, seq->size); 00642 } 00643 else 00644 seq = nibLoadAll(fileName); 00645 return seq; 00646 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct dnaSeq* readMaskedTwoBit | ( | struct twoBitFile * | tbf, | |
| char * | seqName, | |||
| boolean | mask | |||
| ) | [read] |
Definition at line 648 of file genoFind.c.
References dnaSeq::dna, dnaSeq::size, toggleCase(), twoBitReadSeqFrag(), twoBitReadSeqFragLower(), and upperToN().
Referenced by gfIndexTransNibsAndTwoBits().
00652 { 00653 struct dnaSeq *seq; 00654 if (mask) 00655 { 00656 seq = twoBitReadSeqFrag(tbf, seqName, 0, 0); 00657 toggleCase(seq->dna, seq->size); 00658 upperToN(seq->dna, seq->size); 00659 } 00660 else 00661 seq = twoBitReadSeqFragLower(tbf, seqName, 0, 0); 00662 return seq; 00663 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void targetClump | ( | struct genoFind * | gf, | |
| struct gfClump ** | pClumpList, | |||
| struct gfClump * | clump | |||
| ) | [static] |
Definition at line 1212 of file genoFind.c.
References AllocVar, findClumpBounds(), findSource(), gfClumpFree(), gfClump::hitList, genoFind::minMatch, newClump(), gfHit::next, slAddHead, slReverse(), ss, gfClump::target, gfClump::tEnd, genoFind::tileSize, gfHit::tStart, and gfClump::tStart.
Referenced by clumpNear().
01215 { 01216 struct gfSeqSource *ss = findSource(gf, clump->tStart); 01217 if (ss->end >= clump->tEnd) 01218 { 01219 /* Usual simple case: all of clump hits single target. */ 01220 clump->target = ss; 01221 slAddHead(pClumpList, clump); 01222 } 01223 else 01224 { 01225 /* If we've gotten here, then the clump is split across multiple targets. 01226 * We'll have to split it into multiple clumps... */ 01227 struct gfHit *hit, *nextHit, *inList, *outList, *oldList = clump->hitList; 01228 int hCount; 01229 01230 while (oldList != NULL) 01231 { 01232 inList = outList = NULL; 01233 hCount = 0; 01234 for (hit = oldList; hit != NULL; hit = nextHit) 01235 { 01236 nextHit = hit->next; 01237 if (ss->start <= hit->tStart && hit->tStart < ss->end) 01238 { 01239 ++hCount; 01240 slAddHead(&inList, hit); 01241 } 01242 else 01243 { 01244 slAddHead(&outList, hit); 01245 } 01246 } 01247 slReverse(&inList); 01248 slReverse(&outList); 01249 if (hCount >= gf->minMatch) 01250 { 01251 struct gfClump *newClump; 01252 AllocVar(newClump); 01253 newClump->hitList = inList; 01254 newClump->hitCount = hCount; 01255 newClump->target = ss; 01256 findClumpBounds(newClump, gf->tileSize); 01257 slAddHead(pClumpList, newClump); 01258 } 01259 else 01260 { 01261 inList = NULL; 01262 } 01263 oldList = outList; 01264 if (oldList != NULL) 01265 { 01266 ss = findSource(gf, oldList->tStart); 01267 } 01268 } 01269 clump->hitList = NULL; /* We ate up the hit list. */ 01270 gfClumpFree(&clump); 01271 } 01272 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 666 of file genoFind.c.
References dnaSeq::dna, gfCountSeq(), reverseComplement(), trans3::seq, dnaSeq::size, trans3::trans, trans3Free(), and trans3New().
Referenced by gfIndexTransNibsAndTwoBits().
00670 { 00671 int isRc, frame; 00672 struct trans3 *t3; 00673 for (isRc=0; isRc <= 1; ++isRc) 00674 { 00675 if (isRc) 00676 reverseComplement(seq->dna, seq->size); 00677 t3 = trans3New(seq); 00678 for (frame = 0; frame < 3; ++frame) 00679 { 00680 struct genoFind *gf = transGf[isRc][frame]; 00681 gfCountSeq(gf, t3->trans[frame]); 00682 } 00683 trans3Free(&t3); 00684 } 00685 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void transIndexBothStrands | ( | struct dnaSeq * | seq, | |
| struct genoFind * | transGf[2][3], | |||
| bits32 | offset[2][3], | |||
| int | sourceIx, | |||
| char * | fileName | |||
| ) | [static] |
Definition at line 687 of file genoFind.c.
References cloneString(), dnaSeq::dna, gfAddSeq(), trans3::isRc, reverseComplement(), gfSeqSource::seq, dnaSeq::size, ss, trans3::trans, trans3Free(), and trans3New().
Referenced by gfIndexTransNibsAndTwoBits().
00692 { 00693 int isRc, frame; 00694 struct trans3 *t3; 00695 struct gfSeqSource *ss; 00696 for (isRc=0; isRc <= 1; ++isRc) 00697 { 00698 if (isRc) 00699 { 00700 reverseComplement(seq->dna, seq->size); 00701 } 00702 t3 = trans3New(seq); 00703 for (frame = 0; frame < 3; ++frame) 00704 { 00705 struct genoFind *gf = transGf[isRc][frame]; 00706 ss = gf->sources + sourceIx; 00707 gfAddSeq(gf, t3->trans[frame], offset[isRc][frame]); 00708 ss->fileName = cloneString(fileName); 00709 ss->start = offset[isRc][frame]; 00710 offset[isRc][frame] += t3->trans[frame]->size; 00711 ss->end = offset[isRc][frame]; 00712 } 00713 trans3Free(&t3); 00714 } 00715 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static long long twoBitCheckTotalSize | ( | struct twoBitFile * | tbf | ) | [static] |
Definition at line 282 of file genoFind.c.
References errAbort(), twoBitFile::fileName, maxTotalBases(), and twoBitTotalSize().
Referenced by gfCountTilesInTwoBit(), and gfIndexTransNibsAndTwoBits().
00285 { 00286 long long totalSize = twoBitTotalSize(tbf); 00287 if (totalSize > maxTotalBases()) 00288 errAbort("Sorry, can only index up to %lld bases, %s has %lld", 00289 maxTotalBases(), tbf->fileName, totalSize); 00290 return totalSize; 00291 }
Here is the call graph for this function:

Here is the caller graph for this function:

int cmpQuerySize [static] |
int gfNearEnough = 300 [static] |
int ntLookup[256] [static] |
| volatile boolean pipeBroke = FALSE |
char const rcsid[] = "$Id: genoFind.c,v 1.23 2007/02/04 05:09:02 kent Exp $" [static] |
Definition at line 20 of file genoFind.c.
1.5.2