inc/patSpace.h File Reference

#include "dnaseq.h"

Include dependency graph for patSpace.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  patClump

Functions

patSpacemakePatSpace (struct dnaSeq **seqArray, int seqArrayCount, int seedSize, char *oocFileName, int minMatch, int maxGap)
void freePatSpace (struct patSpace **pPatSpace)
patClumppatSpaceFindOne (struct patSpace *ps, DNA *dna, int dnaSize)


Function Documentation

void freePatSpace ( struct patSpace **  pPatSpace  ) 

Definition at line 68 of file patSpace.c.

References patSpace::allocated, freeMem(), and freez().

00070 {
00071 struct patSpace *ps = *pPatSpace;
00072 if (ps != NULL)
00073     {
00074     freeMem(ps->allocated);
00075     freez(pPatSpace);
00076     }
00077 }

Here is the call graph for this function:

struct patSpace* makePatSpace ( struct dnaSeq **  seqArray,
int  seqArrayCount,
int  seedSize,
char *  oocFileName,
int  minMatch,
int  maxGap 
) [read]

Definition at line 247 of file patSpace.c.

References addToPatSpace(), allocPatSpaceLists(), patSpace::blocksUsed, countPatSpace(), patSpace::listSizes, maxBlockCount, patSpace::maxPat, maxPatCount, newPatSpace(), dnaSeq::next, oocMaskCounts(), oocMaskSimpleRepeats(), psBits, patSpace::seedSize, patSpace::seedSpaceSize, dnaSeq::size, and tooManyBlocks().

00258 {
00259 struct patSpace *ps = newPatSpace(minMatch, maxGap,seedSize);
00260 int i;
00261 int startIx = 0;
00262 int total = 0;
00263 struct dnaSeq *seq;
00264 psBits maxPat;
00265 psBits *listSizes;
00266 int seedSpaceSize = ps->seedSpaceSize;
00267 
00268 maxPat = ps->maxPat = maxPatCount;
00269 for (i=0; i<seqArrayCount; ++i)
00270     {
00271     for (seq = seqArray[i]; seq != NULL; seq = seq->next)
00272         {
00273         total += seq->size;
00274         countPatSpace(ps, seq);
00275         }
00276     }
00277 
00278 listSizes = ps->listSizes;
00279 
00280 /* Scan through over-popular patterns and set their count to value 
00281  * where they won't be added to pat space. */
00282 oocMaskCounts(oocFileName, listSizes, ps->seedSize, maxPat);
00283 
00284 /* Get rid of simple repeats as well. */
00285 oocMaskSimpleRepeats(listSizes, ps->seedSize, maxPat);
00286 
00287 
00288 allocPatSpaceLists(ps);
00289 
00290 /* Zero out pattern counts that aren't oversubscribed. */
00291 for (i=0; i<ps->seedSpaceSize; ++i)
00292     {
00293     if (listSizes[i] < maxPat)
00294         listSizes[i] = 0;
00295     }
00296 for (i=0; i<seqArrayCount; ++i)
00297     {
00298         int j;
00299     for (seq = seqArray[i], j=0; seq != NULL; seq = seq->next, ++j)
00300         {
00301         startIx = addToPatSpace(ps, i, j, seq, startIx);
00302         if (startIx >= maxBlockCount)
00303             tooManyBlocks();
00304         }
00305     }
00306 ps->blocksUsed = startIx;
00307 
00308 /* Zero local over-popular patterns. */
00309 for (i=0; i<seedSpaceSize; ++i)
00310     {
00311     if (listSizes[i] >= maxPat)
00312         listSizes[i] = 0;
00313     }
00314 
00315 return ps;
00316 }

Here is the call graph for this function:

struct patClump* patSpaceFindOne ( struct patSpace ps,
DNA dna,
int  dnaSize 
) [read]

Definition at line 392 of file patSpace.c.

References patSpace::blocksUsed, patSpace::hitBlocks, patSpace::lists, patSpace::listSizes, patSpace::minMatch, minMatch, ntValNoN, patSpace::posBuf, psBits, and patSpace::seedSize.

Referenced by ssFindBundles().

00394 {
00395 int lastStart = dnaSize - ps->seedSize;
00396 int i,j;
00397 int pat;
00398 int hitBlockCount = 0;
00399 int totalSigHits = 0;
00400 DNA *tile = dna;
00401 int blocksUsed = ps->blocksUsed;
00402 int *posBuf = ps->posBuf;
00403 int *hitBlocks = ps->hitBlocks;
00404 int minMatch = ps->minMatch;
00405 
00406 memset(ps->posBuf, 0, sizeof(ps->posBuf[0]) * blocksUsed);
00407 for (i=0; i<=lastStart; i += ps->seedSize)
00408     {
00409     psBits *list;
00410     psBits count;
00411 
00412     pat = 0;
00413     for (j=0; j<ps->seedSize; ++j)
00414         {
00415         int bVal = ntValNoN[(int)tile[j]];
00416         pat <<= 2;
00417         pat += bVal;
00418         }
00419     list = ps->lists[pat];
00420     if ((count = ps->listSizes[pat]) > 0)
00421         {
00422         for (j=0; j<count; ++j)
00423             posBuf[list[j]] += 1;            
00424         }
00425     tile += ps->seedSize;
00426     }
00427 
00428 /* Scan through array that records counts of hits at positions. */
00429 for (i=0; i<blocksUsed-1; ++i)
00430     {
00431     /* Save significant hits in a more compact array */
00432     int a = posBuf[i], b = posBuf[i+1];
00433     int sum = a + b;
00434     if (sum >= minMatch)
00435         {
00436         if (a > 0)
00437             {
00438             if (hitBlockCount == 0 || hitBlocks[hitBlockCount-1] != i)
00439                 {
00440                 hitBlocks[hitBlockCount++] = i;
00441                 totalSigHits += a;
00442                 }
00443             }
00444         if (b > 0)
00445             {
00446             hitBlocks[hitBlockCount++] = i+1;
00447             totalSigHits += b;
00448             }
00449         }
00450     }
00451 
00452 /* Output data with significant hits. */
00453 if (hitBlockCount > 0 && totalSigHits*ps->seedSize*8 > dnaSize)
00454     {
00455     return clumpHits(ps, hitBlocks, hitBlockCount, posBuf, dna, 
00456         dnaSize);
00457     }        
00458 else
00459     return NULL;
00460 }

Here is the caller graph for this function:


Generated on Tue Dec 25 19:09:41 2007 for blat by  doxygen 1.5.2