jkOwnLib/patSpace.c File Reference

#include "common.h"
#include "portable.h"
#include "dnaseq.h"
#include "ooc.h"
#include "patSpace.h"

Include dependency graph for patSpace.c:

Go to the source code of this file.

Data Structures

struct  blockPos
struct  patSpace

Defines

#define blockSize   (256)
#define BIGONE
#define maxBlockCount   (2*230*1024 - 1)
#define psBits   bits32
#define maxPatCount   (1024*16)

Functions

void freePatSpace (struct patSpace **pPatSpace)
void tooManyBlocks ()
static void fixBlockSize (struct blockPos *bp, int blockIx)
static struct patSpacenewPatSpace (int minMatch, int maxGap, int seedSize)
static void countPatSpace (struct patSpace *ps, struct dnaSeq *seq)
static int allocPatSpaceLists (struct patSpace *ps)
static int addToPatSpace (struct patSpace *ps, int bacIx, int seqIx, struct dnaSeq *seq, int startBlock)
patSpacemakePatSpace (struct dnaSeq **seqArray, int seqArrayCount, int seedSize, char *oocFileName, int minMatch, int maxGap)
void bfFind (struct patSpace *ps, int block, struct blockPos *ret)
static struct patClumpnewClump (struct patSpace *ps, struct blockPos *first, struct blockPos *last)
static struct patClumpclumpHits (struct patSpace *ps, int hitBlocks[], int hitCount, int posBuf[], DNA *dna, int dnaSize)
patClumppatSpaceFindOne (struct patSpace *ps, DNA *dna, int dnaSize)

Variables

static char const rcsid [] = "$Id: patSpace.c,v 1.3 2006/03/15 18:36:16 angie Exp $"


Define Documentation

#define BIGONE

Definition at line 14 of file patSpace.c.

#define blockSize   (256)

Definition at line 12 of file patSpace.c.

Referenced by addToPatSpace(), axtFromBlocks(), fixBlockSize(), newClump(), pslHasIntron(), pslWeightedIntronOrientation(), and savePslx().

#define maxBlockCount   (2*230*1024 - 1)

Definition at line 18 of file patSpace.c.

Referenced by addToPatSpace(), bfFind(), makePatSpace(), and tooManyBlocks().

#define maxPatCount   (1024*16)

Definition at line 35 of file patSpace.c.

Referenced by countPatSpace(), and makePatSpace().

#define psBits   bits32

Definition at line 19 of file patSpace.c.

Referenced by addToPatSpace(), allocPatSpaceLists(), makePatSpace(), and patSpaceFindOne().


Function Documentation

static int addToPatSpace ( struct patSpace ps,
int  bacIx,
int  seqIx,
struct dnaSeq seq,
int  startBlock 
) [static]

Definition at line 181 of file patSpace.c.

References blockPos::bacIx, patSpace::blockPos, blockSize, dnaSeq::dna, fixBlockSize(), patSpace::lists, patSpace::listSizes, mask, maxBlockCount, patSpace::maxPat, ntValNoN, blockPos::offset, psBits, patSpace::seedSize, patSpace::seedSpaceSize, blockPos::seq, blockPos::seqIx, dnaSeq::size, and tooManyBlocks().

Referenced by makePatSpace().

00183 {
00184 int size = seq->size;
00185 int mask = ps->seedSpaceSize-1;
00186 DNA *dna = seq->dna;
00187 int i;
00188 int bits = 0;
00189 int bVal;
00190 int blockMod = blockSize;
00191 int curCount;
00192 psBits maxPat = ps->maxPat;
00193 psBits *listSizes = ps->listSizes;
00194 psBits **lists = ps->lists;
00195 struct blockPos *bp = &ps->blockPos[startBlock];
00196 
00197 bp->bacIx = bacIx;
00198 bp->seqIx = seqIx;
00199 bp->seq = seq;
00200 bp->offset = 0;
00201 fixBlockSize(bp, startBlock);
00202 ++bp;
00203 for (i=0; i<ps->seedSize-1; ++i)
00204     {
00205     bVal = ntValNoN[(int)dna[i]];
00206     bits <<= 2;
00207     bits += bVal;
00208     }
00209 for (i=ps->seedSize-1; i<size; ++i)
00210     {
00211     bVal = ntValNoN[(int)dna[i]];
00212     bits <<= 2;
00213     bits += bVal;
00214     bits &= mask;
00215     if ((curCount = listSizes[bits]) < maxPat)
00216         {
00217         listSizes[bits] = curCount+1;
00218         lists[bits][curCount] = startBlock;
00219         }
00220     if (--blockMod == 0)
00221         {
00222         if (++startBlock >= maxBlockCount)
00223             tooManyBlocks();
00224         blockMod = blockSize;
00225         bp->bacIx = bacIx;
00226         bp->seqIx = seqIx;
00227         bp->seq = seq;
00228         bp->offset = i - (ps->seedSize-1) + 1;
00229         fixBlockSize(bp, startBlock);
00230         ++bp;
00231         }
00232     }
00233 for (i=0; i<ps->seedSize-1; ++i)
00234     {
00235     if (--blockMod == 0)
00236         {
00237         if (++startBlock >= maxBlockCount)
00238             tooManyBlocks();
00239         blockMod = blockSize;
00240         }
00241     }
00242 if (blockMod != blockSize)
00243     ++startBlock;
00244 return startBlock;
00245 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int allocPatSpaceLists ( struct patSpace ps  )  [static]

Definition at line 141 of file patSpace.c.

References patSpace::allocated, patSpace::lists, patSpace::listSizes, patSpace::maxPat, needLargeMem(), psBits, and patSpace::seedSpaceSize.

Referenced by makePatSpace().

00144 {
00145 int oneCount;
00146 int count = 0;
00147 int i;
00148 psBits *listSizes = ps->listSizes;
00149 psBits **lists = ps->lists;
00150 psBits *allocated;
00151 psBits maxPat = ps->maxPat;
00152 int size;
00153 int usedCount = 0, overusedCount = 0;
00154 int seedSpaceSize = ps->seedSpaceSize;
00155 
00156 for (i=0; i<seedSpaceSize; ++i)
00157     {
00158     /* If pattern is too much used it's no good to us, ignore. */
00159     if ((oneCount = listSizes[i]) < maxPat)
00160         {
00161         count += oneCount;
00162         usedCount += 1;
00163         }
00164     else
00165         {
00166         overusedCount += 1;
00167         }
00168     }
00169 ps->allocated = allocated = needLargeMem(count*sizeof(allocated[0]));
00170 for (i=0; i<seedSpaceSize; ++i)
00171     {
00172     if ((size = listSizes[i]) < maxPat)
00173         {
00174         lists[i] = allocated;
00175         allocated += size;
00176         }
00177     }
00178 return count;
00179 }

Here is the call graph for this function:

Here is the caller graph for this function:

void bfFind ( struct patSpace ps,
int  block,
struct blockPos ret 
)

Definition at line 319 of file patSpace.c.

References patSpace::blockPos, and maxBlockCount.

Referenced by clumpHits().

00321 {
00322 assert(block >= 0 && block < maxBlockCount);
00323 *ret = ps->blockPos[block];
00324 }

Here is the caller graph for this function:

static struct patClump* clumpHits ( struct patSpace ps,
int  hitBlocks[],
int  hitCount,
int  posBuf[],
DNA dna,
int  dnaSize 
) [static, read]

Definition at line 354 of file patSpace.c.

References bfFind(), patSpace::maxGap, maxGap, newClump(), blockPos::offset, blockPos::seq, blockPos::size, slAddHead, and slReverse().

00358 {
00359 /* Clump together hits. */
00360 int block;
00361 int i;
00362 int maxGap = ps->maxGap;
00363 struct blockPos first, cur, pre;
00364 struct patClump *patClump = NULL, *cl;
00365 
00366 bfFind(ps, hitBlocks[0], &first);
00367 pre = first;
00368 for (i=1; i<hitCount; ++i)
00369     {
00370     block = hitBlocks[i];
00371     bfFind(ps, block, &cur);
00372     if (cur.seq != pre.seq || cur.offset - (pre.offset + pre.size) > maxGap)
00373         {
00374         /* Write old clump and start new one. */
00375         cl = newClump(ps, &first, &pre);
00376         slAddHead(&patClump, cl);
00377         first = cur;
00378         }
00379     else
00380         {
00381         /* Extend old clump. */
00382         }
00383     pre = cur;
00384     }
00385 /* Write hitOut last clump. */
00386 cl = newClump(ps, &first, &pre);
00387 slAddHead(&patClump, cl);
00388 slReverse(&patClump);
00389 return patClump;
00390 }

Here is the call graph for this function:

static void countPatSpace ( struct patSpace ps,
struct dnaSeq seq 
) [static]

Definition at line 112 of file patSpace.c.

References dnaSeq::dna, patSpace::listSizes, mask, maxPatCount, ntValNoN, patSpace::seedSize, patSpace::seedSpaceSize, and dnaSeq::size.

Referenced by makePatSpace().

00114 {
00115 int size = seq->size;
00116 int mask = ps->seedSpaceSize-1;
00117 DNA *dna = seq->dna;
00118 int i;
00119 int bits = 0;
00120 int bVal;
00121 int ls;
00122 
00123 for (i=0; i<ps->seedSize-1; ++i)
00124     {
00125     bVal = ntValNoN[(int)dna[i]];
00126     bits <<= 2;
00127     bits += bVal;
00128     }
00129 for (i=ps->seedSize-1; i<size; ++i)
00130     {
00131     bVal = ntValNoN[(int)dna[i]];
00132     bits <<= 2;
00133     bits += bVal;
00134     bits &= mask;
00135     ls = ps->listSizes[bits];
00136     if (ls < maxPatCount)
00137         ps->listSizes[bits] = ls+1;
00138     }
00139 }

Here is the caller graph for this function:

static void fixBlockSize ( struct blockPos bp,
int  blockIx 
) [static]

Definition at line 85 of file patSpace.c.

References blockSize, blockPos::offset, blockPos::seq, blockPos::size, and dnaSeq::size.

Referenced by addToPatSpace().

00087 {
00088 struct dnaSeq *seq = bp->seq;
00089 int size = seq->size - bp->offset;
00090 if (size > blockSize)
00091         size = blockSize;
00092 bp->size = size;
00093 }

Here is the caller graph for this function:

void freePatSpace ( struct patSpace **  pPatSpace  ) 

Definition at line 68 of file patSpace.c.

References patSpace::allocated, freeMem(), and freez().

00070 {
00071 struct patSpace *ps = *pPatSpace;
00072 if (ps != NULL)
00073     {
00074     freeMem(ps->allocated);
00075     freez(pPatSpace);
00076     }
00077 }

Here is the call graph for this function:

struct patSpace* makePatSpace ( struct dnaSeq **  seqArray,
int  seqArrayCount,
int  seedSize,
char *  oocFileName,
int  minMatch,
int  maxGap 
) [read]

Definition at line 247 of file patSpace.c.

References addToPatSpace(), allocPatSpaceLists(), patSpace::blocksUsed, countPatSpace(), patSpace::listSizes, maxBlockCount, patSpace::maxPat, maxPatCount, newPatSpace(), dnaSeq::next, oocMaskCounts(), oocMaskSimpleRepeats(), psBits, patSpace::seedSize, patSpace::seedSpaceSize, dnaSeq::size, and tooManyBlocks().

00258 {
00259 struct patSpace *ps = newPatSpace(minMatch, maxGap,seedSize);
00260 int i;
00261 int startIx = 0;
00262 int total = 0;
00263 struct dnaSeq *seq;
00264 psBits maxPat;
00265 psBits *listSizes;
00266 int seedSpaceSize = ps->seedSpaceSize;
00267 
00268 maxPat = ps->maxPat = maxPatCount;
00269 for (i=0; i<seqArrayCount; ++i)
00270     {
00271     for (seq = seqArray[i]; seq != NULL; seq = seq->next)
00272         {
00273         total += seq->size;
00274         countPatSpace(ps, seq);
00275         }
00276     }
00277 
00278 listSizes = ps->listSizes;
00279 
00280 /* Scan through over-popular patterns and set their count to value 
00281  * where they won't be added to pat space. */
00282 oocMaskCounts(oocFileName, listSizes, ps->seedSize, maxPat);
00283 
00284 /* Get rid of simple repeats as well. */
00285 oocMaskSimpleRepeats(listSizes, ps->seedSize, maxPat);
00286 
00287 
00288 allocPatSpaceLists(ps);
00289 
00290 /* Zero out pattern counts that aren't oversubscribed. */
00291 for (i=0; i<ps->seedSpaceSize; ++i)
00292     {
00293     if (listSizes[i] < maxPat)
00294         listSizes[i] = 0;
00295     }
00296 for (i=0; i<seqArrayCount; ++i)
00297     {
00298         int j;
00299     for (seq = seqArray[i], j=0; seq != NULL; seq = seq->next, ++j)
00300         {
00301         startIx = addToPatSpace(ps, i, j, seq, startIx);
00302         if (startIx >= maxBlockCount)
00303             tooManyBlocks();
00304         }
00305     }
00306 ps->blocksUsed = startIx;
00307 
00308 /* Zero local over-popular patterns. */
00309 for (i=0; i<seedSpaceSize; ++i)
00310     {
00311     if (listSizes[i] >= maxPat)
00312         listSizes[i] = 0;
00313     }
00314 
00315 return ps;
00316 }

Here is the call graph for this function:

static struct patClump* newClump ( struct patSpace ps,
struct blockPos first,
struct blockPos last 
) [static, read]

Definition at line 327 of file patSpace.c.

References AllocVar, patClump::bacIx, blockPos::bacIx, blockSize, blockPos::offset, patClump::seq, blockPos::seq, patClump::seqIx, blockPos::seqIx, patClump::size, dnaSeq::size, blockPos::size, and patClump::start.

Referenced by clumpHits(), and targetClump().

00329 {
00330 struct dnaSeq *seq = first->seq;
00331 int seqIx = first->seqIx;
00332 int bacIx = first->bacIx;
00333 int start = first->offset;
00334 int end = last->offset+last->size;
00335 int extraAtEnds = blockSize/2;
00336 struct patClump *cl;
00337 
00338 start -= extraAtEnds;
00339 if (start < 0)
00340     start = 0;
00341 end += extraAtEnds;
00342 if (end >seq->size)
00343     end = seq->size;
00344 AllocVar(cl);
00345 cl->bacIx = bacIx;
00346 cl->seqIx = seqIx;
00347 cl->seq = seq;
00348 cl->start = start;
00349 cl->size = end-start;
00350 return cl;
00351 }

Here is the caller graph for this function:

static struct patSpace* newPatSpace ( int  minMatch,
int  maxGap,
int  seedSize 
) [static, read]

Definition at line 95 of file patSpace.c.

References AllocVar, patSpace::lists, patSpace::listSizes, patSpace::maxGap, patSpace::minMatch, needLargeZeroedMem(), patSpace::seedSize, and patSpace::seedSpaceSize.

Referenced by makePatSpace().

00097 {
00098 struct patSpace *ps;
00099 int seedBitSize = seedSize*2;
00100 int seedSpaceSize;
00101 
00102 AllocVar(ps);
00103 ps->seedSize = seedSize;
00104 seedSpaceSize = ps->seedSpaceSize = (1<<seedBitSize);
00105 ps->lists = needLargeZeroedMem(seedSpaceSize * sizeof(ps->lists[0]));
00106 ps->listSizes = needLargeZeroedMem(seedSpaceSize * sizeof(ps->listSizes[0]));
00107 ps->minMatch = minMatch;
00108 ps->maxGap = maxGap;
00109 return ps;
00110 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct patClump* patSpaceFindOne ( struct patSpace ps,
DNA dna,
int  dnaSize 
) [read]

Definition at line 392 of file patSpace.c.

References patSpace::blocksUsed, patSpace::hitBlocks, patSpace::lists, patSpace::listSizes, minMatch, patSpace::minMatch, ntValNoN, patSpace::posBuf, psBits, and patSpace::seedSize.

Referenced by ssFindBundles().

00394 {
00395 int lastStart = dnaSize - ps->seedSize;
00396 int i,j;
00397 int pat;
00398 int hitBlockCount = 0;
00399 int totalSigHits = 0;
00400 DNA *tile = dna;
00401 int blocksUsed = ps->blocksUsed;
00402 int *posBuf = ps->posBuf;
00403 int *hitBlocks = ps->hitBlocks;
00404 int minMatch = ps->minMatch;
00405 
00406 memset(ps->posBuf, 0, sizeof(ps->posBuf[0]) * blocksUsed);
00407 for (i=0; i<=lastStart; i += ps->seedSize)
00408     {
00409     psBits *list;
00410     psBits count;
00411 
00412     pat = 0;
00413     for (j=0; j<ps->seedSize; ++j)
00414         {
00415         int bVal = ntValNoN[(int)tile[j]];
00416         pat <<= 2;
00417         pat += bVal;
00418         }
00419     list = ps->lists[pat];
00420     if ((count = ps->listSizes[pat]) > 0)
00421         {
00422         for (j=0; j<count; ++j)
00423             posBuf[list[j]] += 1;            
00424         }
00425     tile += ps->seedSize;
00426     }
00427 
00428 /* Scan through array that records counts of hits at positions. */
00429 for (i=0; i<blocksUsed-1; ++i)
00430     {
00431     /* Save significant hits in a more compact array */
00432     int a = posBuf[i], b = posBuf[i+1];
00433     int sum = a + b;
00434     if (sum >= minMatch)
00435         {
00436         if (a > 0)
00437             {
00438             if (hitBlockCount == 0 || hitBlocks[hitBlockCount-1] != i)
00439                 {
00440                 hitBlocks[hitBlockCount++] = i;
00441                 totalSigHits += a;
00442                 }
00443             }
00444         if (b > 0)
00445             {
00446             hitBlocks[hitBlockCount++] = i+1;
00447             totalSigHits += b;
00448             }
00449         }
00450     }
00451 
00452 /* Output data with significant hits. */
00453 if (hitBlockCount > 0 && totalSigHits*ps->seedSize*8 > dnaSize)
00454     {
00455     return clumpHits(ps, hitBlocks, hitBlockCount, posBuf, dna, 
00456         dnaSize);
00457     }        
00458 else
00459     return NULL;
00460 }

Here is the caller graph for this function:

void tooManyBlocks (  ) 

Definition at line 79 of file patSpace.c.

References errAbort(), and maxBlockCount.

Referenced by addToPatSpace(), and makePatSpace().

00081 {
00082 errAbort("Too many blocks, can only handle %d\n", maxBlockCount);
00083 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char const rcsid[] = "$Id: patSpace.c,v 1.3 2006/03/15 18:36:16 angie Exp $" [static]

Definition at line 10 of file patSpace.c.


Generated on Tue Dec 25 19:29:31 2007 for blat by  doxygen 1.5.2