#include "common.h"#include "portable.h"#include "dnaseq.h"#include "ooc.h"#include "patSpace.h"Include dependency graph for patSpace.c:

Go to the source code of this file.
Data Structures | |
| struct | blockPos |
| struct | patSpace |
Defines | |
| #define | blockSize (256) |
| #define | BIGONE |
| #define | maxBlockCount (2*230*1024 - 1) |
| #define | psBits bits32 |
| #define | maxPatCount (1024*16) |
Functions | |
| void | freePatSpace (struct patSpace **pPatSpace) |
| void | tooManyBlocks () |
| static void | fixBlockSize (struct blockPos *bp, int blockIx) |
| static struct patSpace * | newPatSpace (int minMatch, int maxGap, int seedSize) |
| static void | countPatSpace (struct patSpace *ps, struct dnaSeq *seq) |
| static int | allocPatSpaceLists (struct patSpace *ps) |
| static int | addToPatSpace (struct patSpace *ps, int bacIx, int seqIx, struct dnaSeq *seq, int startBlock) |
| patSpace * | makePatSpace (struct dnaSeq **seqArray, int seqArrayCount, int seedSize, char *oocFileName, int minMatch, int maxGap) |
| void | bfFind (struct patSpace *ps, int block, struct blockPos *ret) |
| static struct patClump * | newClump (struct patSpace *ps, struct blockPos *first, struct blockPos *last) |
| static struct patClump * | clumpHits (struct patSpace *ps, int hitBlocks[], int hitCount, int posBuf[], DNA *dna, int dnaSize) |
| patClump * | patSpaceFindOne (struct patSpace *ps, DNA *dna, int dnaSize) |
Variables | |
| static char const | rcsid [] = "$Id: patSpace.c,v 1.3 2006/03/15 18:36:16 angie Exp $" |
| #define BIGONE |
Definition at line 14 of file patSpace.c.
| #define blockSize (256) |
Definition at line 12 of file patSpace.c.
Referenced by addToPatSpace(), axtFromBlocks(), fixBlockSize(), newClump(), pslHasIntron(), pslWeightedIntronOrientation(), and savePslx().
| #define maxBlockCount (2*230*1024 - 1) |
Definition at line 18 of file patSpace.c.
Referenced by addToPatSpace(), bfFind(), makePatSpace(), and tooManyBlocks().
| #define maxPatCount (1024*16) |
| #define psBits bits32 |
Definition at line 19 of file patSpace.c.
Referenced by addToPatSpace(), allocPatSpaceLists(), makePatSpace(), and patSpaceFindOne().
| static int addToPatSpace | ( | struct patSpace * | ps, | |
| int | bacIx, | |||
| int | seqIx, | |||
| struct dnaSeq * | seq, | |||
| int | startBlock | |||
| ) | [static] |
Definition at line 181 of file patSpace.c.
References blockPos::bacIx, patSpace::blockPos, blockSize, dnaSeq::dna, fixBlockSize(), patSpace::lists, patSpace::listSizes, mask, maxBlockCount, patSpace::maxPat, ntValNoN, blockPos::offset, psBits, patSpace::seedSize, patSpace::seedSpaceSize, blockPos::seq, blockPos::seqIx, dnaSeq::size, and tooManyBlocks().
Referenced by makePatSpace().
00183 { 00184 int size = seq->size; 00185 int mask = ps->seedSpaceSize-1; 00186 DNA *dna = seq->dna; 00187 int i; 00188 int bits = 0; 00189 int bVal; 00190 int blockMod = blockSize; 00191 int curCount; 00192 psBits maxPat = ps->maxPat; 00193 psBits *listSizes = ps->listSizes; 00194 psBits **lists = ps->lists; 00195 struct blockPos *bp = &ps->blockPos[startBlock]; 00196 00197 bp->bacIx = bacIx; 00198 bp->seqIx = seqIx; 00199 bp->seq = seq; 00200 bp->offset = 0; 00201 fixBlockSize(bp, startBlock); 00202 ++bp; 00203 for (i=0; i<ps->seedSize-1; ++i) 00204 { 00205 bVal = ntValNoN[(int)dna[i]]; 00206 bits <<= 2; 00207 bits += bVal; 00208 } 00209 for (i=ps->seedSize-1; i<size; ++i) 00210 { 00211 bVal = ntValNoN[(int)dna[i]]; 00212 bits <<= 2; 00213 bits += bVal; 00214 bits &= mask; 00215 if ((curCount = listSizes[bits]) < maxPat) 00216 { 00217 listSizes[bits] = curCount+1; 00218 lists[bits][curCount] = startBlock; 00219 } 00220 if (--blockMod == 0) 00221 { 00222 if (++startBlock >= maxBlockCount) 00223 tooManyBlocks(); 00224 blockMod = blockSize; 00225 bp->bacIx = bacIx; 00226 bp->seqIx = seqIx; 00227 bp->seq = seq; 00228 bp->offset = i - (ps->seedSize-1) + 1; 00229 fixBlockSize(bp, startBlock); 00230 ++bp; 00231 } 00232 } 00233 for (i=0; i<ps->seedSize-1; ++i) 00234 { 00235 if (--blockMod == 0) 00236 { 00237 if (++startBlock >= maxBlockCount) 00238 tooManyBlocks(); 00239 blockMod = blockSize; 00240 } 00241 } 00242 if (blockMod != blockSize) 00243 ++startBlock; 00244 return startBlock; 00245 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int allocPatSpaceLists | ( | struct patSpace * | ps | ) | [static] |
Definition at line 141 of file patSpace.c.
References patSpace::allocated, patSpace::lists, patSpace::listSizes, patSpace::maxPat, needLargeMem(), psBits, and patSpace::seedSpaceSize.
Referenced by makePatSpace().
00144 { 00145 int oneCount; 00146 int count = 0; 00147 int i; 00148 psBits *listSizes = ps->listSizes; 00149 psBits **lists = ps->lists; 00150 psBits *allocated; 00151 psBits maxPat = ps->maxPat; 00152 int size; 00153 int usedCount = 0, overusedCount = 0; 00154 int seedSpaceSize = ps->seedSpaceSize; 00155 00156 for (i=0; i<seedSpaceSize; ++i) 00157 { 00158 /* If pattern is too much used it's no good to us, ignore. */ 00159 if ((oneCount = listSizes[i]) < maxPat) 00160 { 00161 count += oneCount; 00162 usedCount += 1; 00163 } 00164 else 00165 { 00166 overusedCount += 1; 00167 } 00168 } 00169 ps->allocated = allocated = needLargeMem(count*sizeof(allocated[0])); 00170 for (i=0; i<seedSpaceSize; ++i) 00171 { 00172 if ((size = listSizes[i]) < maxPat) 00173 { 00174 lists[i] = allocated; 00175 allocated += size; 00176 } 00177 } 00178 return count; 00179 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 319 of file patSpace.c.
References patSpace::blockPos, and maxBlockCount.
Referenced by clumpHits().
00321 { 00322 assert(block >= 0 && block < maxBlockCount); 00323 *ret = ps->blockPos[block]; 00324 }
Here is the caller graph for this function:

| static struct patClump* clumpHits | ( | struct patSpace * | ps, | |
| int | hitBlocks[], | |||
| int | hitCount, | |||
| int | posBuf[], | |||
| DNA * | dna, | |||
| int | dnaSize | |||
| ) | [static, read] |
Definition at line 354 of file patSpace.c.
References bfFind(), patSpace::maxGap, maxGap, newClump(), blockPos::offset, blockPos::seq, blockPos::size, slAddHead, and slReverse().
00358 { 00359 /* Clump together hits. */ 00360 int block; 00361 int i; 00362 int maxGap = ps->maxGap; 00363 struct blockPos first, cur, pre; 00364 struct patClump *patClump = NULL, *cl; 00365 00366 bfFind(ps, hitBlocks[0], &first); 00367 pre = first; 00368 for (i=1; i<hitCount; ++i) 00369 { 00370 block = hitBlocks[i]; 00371 bfFind(ps, block, &cur); 00372 if (cur.seq != pre.seq || cur.offset - (pre.offset + pre.size) > maxGap) 00373 { 00374 /* Write old clump and start new one. */ 00375 cl = newClump(ps, &first, &pre); 00376 slAddHead(&patClump, cl); 00377 first = cur; 00378 } 00379 else 00380 { 00381 /* Extend old clump. */ 00382 } 00383 pre = cur; 00384 } 00385 /* Write hitOut last clump. */ 00386 cl = newClump(ps, &first, &pre); 00387 slAddHead(&patClump, cl); 00388 slReverse(&patClump); 00389 return patClump; 00390 }
Here is the call graph for this function:

Definition at line 112 of file patSpace.c.
References dnaSeq::dna, patSpace::listSizes, mask, maxPatCount, ntValNoN, patSpace::seedSize, patSpace::seedSpaceSize, and dnaSeq::size.
Referenced by makePatSpace().
00114 { 00115 int size = seq->size; 00116 int mask = ps->seedSpaceSize-1; 00117 DNA *dna = seq->dna; 00118 int i; 00119 int bits = 0; 00120 int bVal; 00121 int ls; 00122 00123 for (i=0; i<ps->seedSize-1; ++i) 00124 { 00125 bVal = ntValNoN[(int)dna[i]]; 00126 bits <<= 2; 00127 bits += bVal; 00128 } 00129 for (i=ps->seedSize-1; i<size; ++i) 00130 { 00131 bVal = ntValNoN[(int)dna[i]]; 00132 bits <<= 2; 00133 bits += bVal; 00134 bits &= mask; 00135 ls = ps->listSizes[bits]; 00136 if (ls < maxPatCount) 00137 ps->listSizes[bits] = ls+1; 00138 } 00139 }
Here is the caller graph for this function:

| static void fixBlockSize | ( | struct blockPos * | bp, | |
| int | blockIx | |||
| ) | [static] |
Definition at line 85 of file patSpace.c.
References blockSize, blockPos::offset, blockPos::seq, blockPos::size, and dnaSeq::size.
Referenced by addToPatSpace().
00087 { 00088 struct dnaSeq *seq = bp->seq; 00089 int size = seq->size - bp->offset; 00090 if (size > blockSize) 00091 size = blockSize; 00092 bp->size = size; 00093 }
Here is the caller graph for this function:

| void freePatSpace | ( | struct patSpace ** | pPatSpace | ) |
Definition at line 68 of file patSpace.c.
References patSpace::allocated, freeMem(), and freez().
00070 { 00071 struct patSpace *ps = *pPatSpace; 00072 if (ps != NULL) 00073 { 00074 freeMem(ps->allocated); 00075 freez(pPatSpace); 00076 } 00077 }
Here is the call graph for this function:

| struct patSpace* makePatSpace | ( | struct dnaSeq ** | seqArray, | |
| int | seqArrayCount, | |||
| int | seedSize, | |||
| char * | oocFileName, | |||
| int | minMatch, | |||
| int | maxGap | |||
| ) | [read] |
Definition at line 247 of file patSpace.c.
References addToPatSpace(), allocPatSpaceLists(), patSpace::blocksUsed, countPatSpace(), patSpace::listSizes, maxBlockCount, patSpace::maxPat, maxPatCount, newPatSpace(), dnaSeq::next, oocMaskCounts(), oocMaskSimpleRepeats(), psBits, patSpace::seedSize, patSpace::seedSpaceSize, dnaSeq::size, and tooManyBlocks().
00258 { 00259 struct patSpace *ps = newPatSpace(minMatch, maxGap,seedSize); 00260 int i; 00261 int startIx = 0; 00262 int total = 0; 00263 struct dnaSeq *seq; 00264 psBits maxPat; 00265 psBits *listSizes; 00266 int seedSpaceSize = ps->seedSpaceSize; 00267 00268 maxPat = ps->maxPat = maxPatCount; 00269 for (i=0; i<seqArrayCount; ++i) 00270 { 00271 for (seq = seqArray[i]; seq != NULL; seq = seq->next) 00272 { 00273 total += seq->size; 00274 countPatSpace(ps, seq); 00275 } 00276 } 00277 00278 listSizes = ps->listSizes; 00279 00280 /* Scan through over-popular patterns and set their count to value 00281 * where they won't be added to pat space. */ 00282 oocMaskCounts(oocFileName, listSizes, ps->seedSize, maxPat); 00283 00284 /* Get rid of simple repeats as well. */ 00285 oocMaskSimpleRepeats(listSizes, ps->seedSize, maxPat); 00286 00287 00288 allocPatSpaceLists(ps); 00289 00290 /* Zero out pattern counts that aren't oversubscribed. */ 00291 for (i=0; i<ps->seedSpaceSize; ++i) 00292 { 00293 if (listSizes[i] < maxPat) 00294 listSizes[i] = 0; 00295 } 00296 for (i=0; i<seqArrayCount; ++i) 00297 { 00298 int j; 00299 for (seq = seqArray[i], j=0; seq != NULL; seq = seq->next, ++j) 00300 { 00301 startIx = addToPatSpace(ps, i, j, seq, startIx); 00302 if (startIx >= maxBlockCount) 00303 tooManyBlocks(); 00304 } 00305 } 00306 ps->blocksUsed = startIx; 00307 00308 /* Zero local over-popular patterns. */ 00309 for (i=0; i<seedSpaceSize; ++i) 00310 { 00311 if (listSizes[i] >= maxPat) 00312 listSizes[i] = 0; 00313 } 00314 00315 return ps; 00316 }
Here is the call graph for this function:

| static struct patClump* newClump | ( | struct patSpace * | ps, | |
| struct blockPos * | first, | |||
| struct blockPos * | last | |||
| ) | [static, read] |
Definition at line 327 of file patSpace.c.
References AllocVar, patClump::bacIx, blockPos::bacIx, blockSize, blockPos::offset, patClump::seq, blockPos::seq, patClump::seqIx, blockPos::seqIx, patClump::size, dnaSeq::size, blockPos::size, and patClump::start.
Referenced by clumpHits(), and targetClump().
00329 { 00330 struct dnaSeq *seq = first->seq; 00331 int seqIx = first->seqIx; 00332 int bacIx = first->bacIx; 00333 int start = first->offset; 00334 int end = last->offset+last->size; 00335 int extraAtEnds = blockSize/2; 00336 struct patClump *cl; 00337 00338 start -= extraAtEnds; 00339 if (start < 0) 00340 start = 0; 00341 end += extraAtEnds; 00342 if (end >seq->size) 00343 end = seq->size; 00344 AllocVar(cl); 00345 cl->bacIx = bacIx; 00346 cl->seqIx = seqIx; 00347 cl->seq = seq; 00348 cl->start = start; 00349 cl->size = end-start; 00350 return cl; 00351 }
Here is the caller graph for this function:

| static struct patSpace* newPatSpace | ( | int | minMatch, | |
| int | maxGap, | |||
| int | seedSize | |||
| ) | [static, read] |
Definition at line 95 of file patSpace.c.
References AllocVar, patSpace::lists, patSpace::listSizes, patSpace::maxGap, patSpace::minMatch, needLargeZeroedMem(), patSpace::seedSize, and patSpace::seedSpaceSize.
Referenced by makePatSpace().
00097 { 00098 struct patSpace *ps; 00099 int seedBitSize = seedSize*2; 00100 int seedSpaceSize; 00101 00102 AllocVar(ps); 00103 ps->seedSize = seedSize; 00104 seedSpaceSize = ps->seedSpaceSize = (1<<seedBitSize); 00105 ps->lists = needLargeZeroedMem(seedSpaceSize * sizeof(ps->lists[0])); 00106 ps->listSizes = needLargeZeroedMem(seedSpaceSize * sizeof(ps->listSizes[0])); 00107 ps->minMatch = minMatch; 00108 ps->maxGap = maxGap; 00109 return ps; 00110 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 392 of file patSpace.c.
References patSpace::blocksUsed, patSpace::hitBlocks, patSpace::lists, patSpace::listSizes, minMatch, patSpace::minMatch, ntValNoN, patSpace::posBuf, psBits, and patSpace::seedSize.
Referenced by ssFindBundles().
00394 { 00395 int lastStart = dnaSize - ps->seedSize; 00396 int i,j; 00397 int pat; 00398 int hitBlockCount = 0; 00399 int totalSigHits = 0; 00400 DNA *tile = dna; 00401 int blocksUsed = ps->blocksUsed; 00402 int *posBuf = ps->posBuf; 00403 int *hitBlocks = ps->hitBlocks; 00404 int minMatch = ps->minMatch; 00405 00406 memset(ps->posBuf, 0, sizeof(ps->posBuf[0]) * blocksUsed); 00407 for (i=0; i<=lastStart; i += ps->seedSize) 00408 { 00409 psBits *list; 00410 psBits count; 00411 00412 pat = 0; 00413 for (j=0; j<ps->seedSize; ++j) 00414 { 00415 int bVal = ntValNoN[(int)tile[j]]; 00416 pat <<= 2; 00417 pat += bVal; 00418 } 00419 list = ps->lists[pat]; 00420 if ((count = ps->listSizes[pat]) > 0) 00421 { 00422 for (j=0; j<count; ++j) 00423 posBuf[list[j]] += 1; 00424 } 00425 tile += ps->seedSize; 00426 } 00427 00428 /* Scan through array that records counts of hits at positions. */ 00429 for (i=0; i<blocksUsed-1; ++i) 00430 { 00431 /* Save significant hits in a more compact array */ 00432 int a = posBuf[i], b = posBuf[i+1]; 00433 int sum = a + b; 00434 if (sum >= minMatch) 00435 { 00436 if (a > 0) 00437 { 00438 if (hitBlockCount == 0 || hitBlocks[hitBlockCount-1] != i) 00439 { 00440 hitBlocks[hitBlockCount++] = i; 00441 totalSigHits += a; 00442 } 00443 } 00444 if (b > 0) 00445 { 00446 hitBlocks[hitBlockCount++] = i+1; 00447 totalSigHits += b; 00448 } 00449 } 00450 } 00451 00452 /* Output data with significant hits. */ 00453 if (hitBlockCount > 0 && totalSigHits*ps->seedSize*8 > dnaSize) 00454 { 00455 return clumpHits(ps, hitBlocks, hitBlockCount, posBuf, dna, 00456 dnaSize); 00457 } 00458 else 00459 return NULL; 00460 }
Here is the caller graph for this function:

| void tooManyBlocks | ( | ) |
Definition at line 79 of file patSpace.c.
References errAbort(), and maxBlockCount.
Referenced by addToPatSpace(), and makePatSpace().
00081 { 00082 errAbort("Too many blocks, can only handle %d\n", maxBlockCount); 00083 }
Here is the call graph for this function:

Here is the caller graph for this function:

char const rcsid[] = "$Id: patSpace.c,v 1.3 2006/03/15 18:36:16 angie Exp $" [static] |
Definition at line 10 of file patSpace.c.
1.5.2