inc/twoBit.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  twoBit
struct  twoBitIndex
struct  twoBitFile
struct  twoBitSpec
struct  twoBitSeqSpec

Functions

twoBitFiletwoBitOpen (char *fileName)
void twoBitClose (struct twoBitFile **pTbf)
int twoBitSeqSize (struct twoBitFile *tbf, char *name)
long long twoBitTotalSize (struct twoBitFile *tbf)
dnaSeqtwoBitReadSeqFragExt (struct twoBitFile *tbf, char *name, int fragStart, int fragEnd, boolean doMask, int *retFullSize)
dnaSeqtwoBitReadSeqFrag (struct twoBitFile *tbf, char *name, int fragStart, int fragEnd)
dnaSeqtwoBitReadSeqFragLower (struct twoBitFile *tbf, char *name, int fragStart, int fragEnd)
dnaSeqtwoBitLoadAll (char *spec)
slNametwoBitSeqNames (char *fileName)
twoBittwoBitFromDnaSeq (struct dnaSeq *seq, boolean doMask)
twoBittwoBitFromFile (char *fileName)
void twoBitWriteOne (struct twoBit *twoBit, FILE *f)
void twoBitWriteHeader (struct twoBit *twoBitList, FILE *f)
boolean twoBitIsFile (char *fileName)
boolean twoBitParseRange (char *rangeSpec, char **retFile, char **retSeq, int *retStart, int *retEnd)
boolean twoBitIsRange (char *rangeSpec)
boolean twoBitIsFileOrRange (char *spec)
boolean twoBitIsSpec (char *spec)
twoBitSpectwoBitSpecNew (char *specStr)
twoBitSpectwoBitSpecNewFile (char *twoBitFile, char *specFile)
void twoBitSpecFree (struct twoBitSpec **specPtr)
void twoBitOutNBeds (struct twoBitFile *tbf, char *seqName, FILE *outF)
int twoBitSeqSizeNoNs (struct twoBitFile *tbf, char *seqName)


Function Documentation

void twoBitClose ( struct twoBitFile **  pTbf  ) 

Definition at line 263 of file twoBit.c.

References carefulClose(), twoBitFile::f, twoBitFile::fileName, freez(), twoBitFile::hash, and hashFree.

Referenced by dnaLoadSingle(), dnaLoadStackFree(), gfCountTilesInTwoBit(), gfFileCacheFreeEl(), gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), nibTwoCacheFree(), nibTwoLoadOne(), readSeqFrag(), searchOneIndex(), twoBitFromFile(), twoBitLoadAll(), and twoBitSeqNames().

00265 {
00266 struct twoBitFile *tbf = *pTbf;
00267 if (tbf != NULL)
00268     {
00269     freez(&tbf->fileName);
00270     carefulClose(&tbf->f);
00271     hashFree(&tbf->hash);
00272     /* The indexList is allocated out of the hash's memory pool. */
00273     freez(pTbf);
00274     }
00275 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct twoBit* twoBitFromDnaSeq ( struct dnaSeq seq,
boolean  doMask 
) [read]

Definition at line 122 of file twoBit.c.

References AllocArray, AllocVar, cloneString(), countBlocksOfLower(), countBlocksOfN(), twoBit::data, dnaSeq::dna, twoBit::maskBlockCount, twoBit::maskSizes, twoBit::maskStarts, twoBit::name, dnaSeq::name, twoBit::nBlockCount, twoBit::nSizes, twoBit::nStarts, packDna4(), packedSize(), dnaSeq::size, twoBit::size, storeBlocksOfLower(), storeBlocksOfN(), and UBYTE.

00125 {
00126 int ubyteSize = packedSize(seq->size);
00127 UBYTE *pt;
00128 struct twoBit *twoBit;
00129 DNA last4[4];   /* Holds few bases. */
00130 DNA *dna;
00131 int i, end;
00132 
00133 /* Allocate structure and fill in name. */
00134 AllocVar(twoBit);
00135 pt = AllocArray(twoBit->data, ubyteSize);
00136 twoBit->name = cloneString(seq->name);
00137 twoBit->size = seq->size;
00138 
00139 /* Convert to 4-bases per byte representation. */
00140 dna = seq->dna;
00141 end = seq->size - 4;
00142 for (i=0; i<end; i += 4)
00143     {
00144     *pt++ = packDna4(dna+i);
00145     }
00146 
00147 /* Take care of conversion of last few bases. */
00148 last4[0] = last4[1] = last4[2] = last4[3] = 'T';
00149 memcpy(last4, dna+i, seq->size-i);
00150 *pt = packDna4(last4);
00151 
00152 /* Deal with blocks of N. */
00153 twoBit->nBlockCount = countBlocksOfN(dna, seq->size);
00154 if (twoBit->nBlockCount > 0)
00155     {
00156     AllocArray(twoBit->nStarts, twoBit->nBlockCount);
00157     AllocArray(twoBit->nSizes, twoBit->nBlockCount);
00158     storeBlocksOfN(dna, seq->size, twoBit->nStarts, twoBit->nSizes);
00159     }
00160 
00161 /* Deal with masking */
00162 if (doMask)
00163     {
00164     twoBit->maskBlockCount = countBlocksOfLower(dna, seq->size);
00165     if (twoBit->maskBlockCount > 0)
00166         {
00167         AllocArray(twoBit->maskStarts, twoBit->maskBlockCount);
00168         AllocArray(twoBit->maskSizes, twoBit->maskBlockCount);
00169         storeBlocksOfLower(dna, seq->size, 
00170                 twoBit->maskStarts, twoBit->maskSizes);
00171         }
00172     }
00173 return twoBit;
00174 }

Here is the call graph for this function:

struct twoBit* twoBitFromFile ( char *  fileName  )  [read]

Definition at line 404 of file twoBit.c.

References AllocVar, bits32, cloneString(), twoBitFile::f, twoBitFile::indexList, twoBitFile::isSwapped, mustRead(), name, twoBitIndex::name, needLargeMem(), twoBitIndex::next, packedSize(), readBits32(), readBlockCoords(), slAddHead, slReverse(), twoBitClose(), twoBitOpen(), and twoBitSeekTo().

00406 {
00407 struct twoBitFile *tbf = twoBitOpen(fileName);
00408 struct twoBitIndex *index;
00409 struct twoBit *twoBitList = NULL, *twoBit = NULL;
00410 boolean isSwapped = tbf->isSwapped;
00411 FILE *f = tbf->f;
00412 
00413 for (index = tbf->indexList; index != NULL; index = index->next)
00414     {
00415     char *name = index->name;
00416     bits32 packByteCount;
00417 
00418     AllocVar(twoBit);
00419     twoBit->name = cloneString(name);
00420 
00421     /* Find offset in index and seek to it */
00422     twoBitSeekTo(tbf, name);
00423 
00424     /* Read in seqSize. */
00425     twoBit->size = readBits32(f, isSwapped);
00426 
00427     /* Read in blocks of N. */
00428     readBlockCoords(f, isSwapped, &(twoBit->nBlockCount),
00429                     &(twoBit->nStarts), &(twoBit->nSizes));
00430 
00431     /* Read in masked blocks. */
00432     readBlockCoords(f, isSwapped, &(twoBit->maskBlockCount),
00433                     &(twoBit->maskStarts), &(twoBit->maskSizes));
00434 
00435     /* Reserved word. */
00436     twoBit->reserved = readBits32(f, isSwapped);
00437 
00438     /* Read in data. */
00439     packByteCount = packedSize(twoBit->size);
00440     twoBit->data = needLargeMem(packByteCount);
00441     mustRead(f, twoBit->data, packByteCount);
00442 
00443     slAddHead(&twoBitList, twoBit);
00444     }
00445 
00446 twoBitClose(&tbf);
00447 slReverse(&twoBitList);
00448 return twoBitList;
00449 }

Here is the call graph for this function:

boolean twoBitIsFile ( char *  fileName  ) 

Definition at line 711 of file twoBit.c.

References endsWith().

Referenced by dnaLoadStackNew(), gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), gfMakeOoc(), nibTwoCacheNew(), nibTwoLoadOne(), twoBitIsFileOrRange(), and twoBitIsRange().

00713 {
00714 return endsWith(fileName, ".2bit");
00715 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean twoBitIsFileOrRange ( char *  spec  ) 

Definition at line 794 of file twoBit.c.

References twoBitIsFile(), and twoBitIsRange().

00796 {
00797 return twoBitIsFile(spec) || twoBitIsRange(spec);
00798 }

Here is the call graph for this function:

boolean twoBitIsRange ( char *  rangeSpec  ) 

Definition at line 781 of file twoBit.c.

References cloneString(), freeMem(), twoBitIsFile(), and twoBitParseRange().

Referenced by dnaLoadSingle(), and twoBitIsFileOrRange().

00783 {
00784 char *dupe = cloneString(rangeSpec);
00785 char *file, *seq;
00786 int start, end;
00787 boolean isRange = twoBitParseRange(dupe, &file, &seq, &start, &end);
00788 if (isRange)
00789     isRange = twoBitIsFile(file);
00790 freeMem(dupe);
00791 return isRange;
00792 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean twoBitIsSpec ( char *  spec  ) 

Definition at line 830 of file twoBit.c.

References twoBitSpecFree(), and twoBitSpecNew().

Referenced by gfClientFileArray(), gfClientSeqList(), and searchOneIndex().

00832 {
00833 struct twoBitSpec *tbs = twoBitSpecNew(spec);
00834 boolean isSpec = (tbs != NULL);
00835 twoBitSpecFree(&tbs);
00836 return isSpec;
00837 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* twoBitLoadAll ( char *  spec  )  [read]

Definition at line 662 of file twoBit.c.

References twoBitSeqSpec::end, twoBitSpec::fileName, twoBitFile::indexList, twoBitSeqSpec::name, twoBitIndex::name, twoBitSeqSpec::next, twoBitIndex::next, twoBitSpec::seqs, slReverse(), slSafeAddHead(), twoBitSeqSpec::start, twoBitClose(), twoBitOpen(), twoBitReadSeqFrag(), twoBitSpecFree(), and twoBitSpecNew().

Referenced by gfClientSeqList(), and gfMakeOoc().

00664            :
00665  *
00666  *    file/path/input.2bit[:seqSpec1][,seqSpec2,...]
00667  *
00668  * where seqSpec is either
00669  *     seqName
00670  *  or
00671  *     seqName:start-end */
00672 {
00673 struct twoBitSpec *tbs = twoBitSpecNew(spec);
00674 struct twoBitFile *tbf = twoBitOpen(tbs->fileName);
00675 struct dnaSeq *list = NULL;
00676 if (tbs->seqs != NULL)
00677     {
00678     struct twoBitSeqSpec *tbss;
00679     for (tbss = tbs->seqs; tbss != NULL; tbss = tbss->next)
00680         slSafeAddHead(&list, twoBitReadSeqFrag(tbf, tbss->name,
00681                                                tbss->start, tbss->end));
00682     }
00683 else
00684     {
00685     struct twoBitIndex *index;
00686     for (index = tbf->indexList; index != NULL; index = index->next)
00687         slSafeAddHead(&list, twoBitReadSeqFrag(tbf, index->name, 0, 0));
00688     }
00689 slReverse(&list);
00690 twoBitClose(&tbf);
00691 twoBitSpecFree(&tbs);
00692 return list;
00693 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct twoBitFile* twoBitOpen ( char *  fileName  )  [read]

Definition at line 287 of file twoBit.c.

References AllocVar, bits32, cloneString(), digitsBaseTwo(), errAbort(), twoBitFile::f, FALSE, fastReadString(), twoBitFile::fileName, twoBitFile::hash, hashAddSaveName(), hashNew, twoBitFile::indexList, twoBitFile::isSwapped, hash::lm, lmAllocVar, mustOpen(), mustReadOne, name, twoBitIndex::name, twoBitIndex::offset, readBits32(), twoBitFile::reserved, twoBitFile::seqCount, slAddHead, slReverse(), TRUE, twoBitSig, twoBitSwapSig, and twoBitFile::version.

Referenced by dnaLoadSingle(), dnaLoadStackNew(), gfCountTilesInTwoBit(), gfiExpandAndLoadCached(), gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), nibTwoCacheNew(), nibTwoLoadOne(), readSeqFrag(), searchOneIndex(), twoBitFromFile(), twoBitLoadAll(), and twoBitSeqNames().

00290 {
00291 bits32 sig;
00292 struct twoBitFile *tbf;
00293 struct twoBitIndex *index;
00294 boolean isSwapped = FALSE;
00295 int i;
00296 struct hash *hash;
00297 FILE *f = mustOpen(fileName, "rb");
00298 
00299 /* Allocate header verify signature, and read in
00300  * the constant-length bits. */
00301 AllocVar(tbf);
00302 mustReadOne(f, sig);
00303 if (sig == twoBitSwapSig)
00304     isSwapped = tbf->isSwapped = TRUE;
00305 else if (sig != twoBitSig)
00306     errAbort("%s doesn't have a valid twoBitSig", fileName);
00307 tbf->fileName = cloneString(fileName);
00308 tbf->f = f;
00309 tbf->version = readBits32(f, isSwapped);
00310 if (tbf->version != 0)
00311     {
00312     errAbort("Can only handle version 0 of this file. This is version %d",
00313         (int)tbf->version);
00314     }
00315 tbf->seqCount = readBits32(f, isSwapped);
00316 tbf->reserved = readBits32(f, isSwapped);
00317 
00318 /* Read in index. */
00319 hash = tbf->hash = hashNew(digitsBaseTwo(tbf->seqCount));
00320 for (i=0; i<tbf->seqCount; ++i)
00321     {
00322     char name[256];
00323     if (!fastReadString(f, name))
00324         errAbort("%s is truncated", fileName);
00325     lmAllocVar(hash->lm, index);
00326     index->offset = readBits32(f, isSwapped);
00327     hashAddSaveName(hash, name, index, &index->name);
00328     slAddHead(&tbf->indexList, index);
00329     }
00330 slReverse(&tbf->indexList);
00331 return tbf;
00332 }

Here is the call graph for this function:

Here is the caller graph for this function:

void twoBitOutNBeds ( struct twoBitFile tbf,
char *  seqName,
FILE *  outF 
)

Definition at line 933 of file twoBit.c.

References AllocArray, bits32, byteSwap32(), twoBitFile::f, freez(), twoBitFile::isSwapped, readBits32(), and twoBitSeekTo().

00935 {
00936 int nBlockCount;
00937 
00938 twoBitSeekTo(tbf, seqName);
00939 
00940 readBits32(tbf->f, tbf->isSwapped);
00941 
00942 /* Read in blocks of N. */
00943 nBlockCount = readBits32(tbf->f, tbf->isSwapped);
00944 
00945 if (nBlockCount > 0)
00946     {
00947     bits32 *nStarts = NULL, *nSizes = NULL;
00948     int i;
00949 
00950     AllocArray(nStarts, nBlockCount);
00951     AllocArray(nSizes, nBlockCount);
00952     fread(nStarts, sizeof(nStarts[0]), nBlockCount, tbf->f);
00953     fread(nSizes, sizeof(nSizes[0]), nBlockCount, tbf->f);
00954     if (tbf->isSwapped)
00955         {
00956         for (i=0; i<nBlockCount; ++i)
00957             {
00958             nStarts[i] = byteSwap32(nStarts[i]);
00959             nSizes[i] = byteSwap32(nSizes[i]);
00960             }
00961         }
00962 
00963     for (i=0; i<nBlockCount; ++i)
00964         {
00965         fprintf(outF, "%s\t%d\t%d\n", seqName, nStarts[i], nStarts[i] + nSizes[i]);
00966         }
00967 
00968     freez(&nStarts);
00969     freez(&nSizes);
00970     }
00971 }

Here is the call graph for this function:

boolean twoBitParseRange ( char *  rangeSpec,
char **  retFile,
char **  retSeq,
int *  retStart,
int *  retEnd 
)

Definition at line 717 of file twoBit.c.

References FALSE, and TRUE.

Referenced by dnaLoadSingle(), and twoBitIsRange().

00720                     :seqName:start-end
00721  * or
00722  *    file/path/name:seqName
00723  * or
00724  *    file/path/name:seqName1,seqName2,seqName3,...
00725  * This will destroy the input 'rangeSpec' in the process.  Returns FALSE if
00726  * it doesn't fit this format, setting retFile to rangeSpec, and retSet to
00727  * null.  If it is the shorter form then start and end will both be returned
00728  * as zero, which is ok by twoBitReadSeqFrag.  Any of the return arguments
00729  * maybe NULL.
00730  */
00731 {
00732 char *s, *e;
00733 int n;
00734 
00735 /* default returns */
00736 if (retFile != NULL)
00737     *retFile = rangeSpec;
00738 if (retSeq != NULL)
00739     *retSeq = NULL;
00740 if (retStart != NULL)
00741     *retStart = 0;
00742 if (retEnd != NULL)
00743     *retEnd = 0;
00744 
00745 /* start with final name  */
00746 s = strrchr(rangeSpec, '/');
00747 if (s == NULL)
00748     s = rangeSpec;
00749 else
00750     s++;
00751 
00752 /* Grab seqName, zero terminate fileName. */
00753 s = strchr(s, ':');
00754 if (s == NULL)
00755     return FALSE;
00756 *s++ = 0;
00757 if (retSeq != NULL)
00758     *retSeq = s;
00759 
00760 /* Grab start, zero terminate seqName. */
00761 s = strchr(s, ':');
00762 if (s == NULL)
00763     return TRUE;  /* no range spec */
00764 *s++ = 0;
00765 n = strtol(s, &e, 0);
00766 if (*e != '-')
00767     return FALSE; /* not a valid range */
00768 if (retStart != NULL)
00769     *retStart = n;
00770 s = e+1;
00771 
00772 /* Grab end. */
00773 n = strtol(s, &e, 0);
00774 if (*e != '\0')
00775     return FALSE; /* not a valid range */
00776 if (retEnd != NULL)
00777     *retEnd = n;
00778 return TRUE;
00779 }

Here is the caller graph for this function:

struct dnaSeq* twoBitReadSeqFrag ( struct twoBitFile tbf,
char *  name,
int  fragStart,
int  fragEnd 
) [read]

Definition at line 625 of file twoBit.c.

References TRUE, and twoBitReadSeqFragExt().

Referenced by dnaLoadNextFromStack(), dnaLoadSingle(), nibTwoCacheSeq(), nibTwoLoadOne(), readMaskedTwoBit(), searchOneIndex(), and twoBitLoadAll().

00631 {
00632 return twoBitReadSeqFragExt(tbf, name, fragStart, fragEnd, TRUE, NULL);
00633 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* twoBitReadSeqFragExt ( struct twoBitFile tbf,
char *  name,
int  fragStart,
int  fragEnd,
boolean  doMask,
int *  retFullSize 
) [read]

Definition at line 451 of file twoBit.c.

References AllocVar, bits32, cloneString(), dnaSeq::dna, dnaUtilOpen(), errAbort(), twoBitFile::f, findGreatestLowerBound(), freez(), twoBitFile::isSwapped, mustRead(), dnaSeq::name, needLargeMem(), readBits32(), readBlockCoords(), safef(), SEEK_CUR, dnaSeq::size, toLowerN(), toUpperN(), twoBitSeekTo(), UBYTE, and valToNt.

Referenced by nibTwoCacheSeqPartExt(), twoBitReadSeqFrag(), and twoBitReadSeqFragLower().

00457 {
00458 struct dnaSeq *seq;
00459 bits32 seqSize;
00460 bits32 nBlockCount, maskBlockCount;
00461 bits32 *nStarts = NULL, *nSizes = NULL;
00462 bits32 *maskStarts = NULL, *maskSizes = NULL;
00463 boolean isSwapped = tbf->isSwapped;
00464 FILE *f = tbf->f;
00465 int i;
00466 int packByteCount, packedStart, packedEnd, remainder, midStart, midEnd;
00467 int outSize;
00468 UBYTE *packed, *packedAlloc;
00469 DNA *dna;
00470 
00471 /* Find offset in index and seek to it */
00472 dnaUtilOpen();
00473 twoBitSeekTo(tbf, name);
00474 
00475 /* Read in seqSize. */
00476 seqSize = readBits32(f, isSwapped);
00477 if (fragEnd == 0)
00478     fragEnd = seqSize;
00479 if (fragEnd > seqSize)
00480     errAbort("twoBitReadSeqFrag in %s end (%d) >= seqSize (%d)", name, fragEnd, seqSize);
00481 outSize = fragEnd - fragStart;
00482 if (outSize < 1)
00483     errAbort("twoBitReadSeqFrag in %s start (%d) >= end (%d)", name, fragStart, fragEnd);
00484 
00485 /* Read in blocks of N. */
00486 readBlockCoords(f, isSwapped, &nBlockCount, &nStarts, &nSizes);
00487 
00488 /* Read in masked blocks. */
00489 readBlockCoords(f, isSwapped, &maskBlockCount, &maskStarts, &maskSizes);
00490 
00491 /* Skip over reserved word. */
00492 readBits32(f, isSwapped);
00493 
00494 /* Allocate dnaSeq, and fill in zero tag at end of sequence. */
00495 AllocVar(seq);
00496 if (outSize == seqSize)
00497     seq->name = cloneString(name);
00498 else
00499     {
00500     char buf[256*2];
00501     safef(buf, sizeof(buf), "%s:%d-%d", name, fragStart, fragEnd);
00502     seq->name = cloneString(buf);
00503     }
00504 seq->size = outSize;
00505 dna = seq->dna = needLargeMem(outSize+1);
00506 seq->dna[outSize] = 0;
00507 
00508 
00509 /* Skip to bits we need and read them in. */
00510 packedStart = (fragStart>>2);
00511 packedEnd = ((fragEnd+3)>>2);
00512 packByteCount = packedEnd - packedStart;
00513 packed = packedAlloc = needLargeMem(packByteCount);
00514 fseek(f, packedStart, SEEK_CUR);
00515 mustRead(f, packed, packByteCount);
00516 
00517 /* Handle case where everything is in one packed byte */
00518 if (packByteCount == 1)
00519     {
00520     int pOff = (packedStart<<2);
00521     int pStart = fragStart - pOff;
00522     int pEnd = fragEnd - pOff;
00523     UBYTE partial = *packed;
00524     assert(pEnd <= 4);
00525     assert(pStart >= 0);
00526     for (i=pStart; i<pEnd; ++i)
00527         *dna++ = valToNt[(partial >> (6-i-i)) & 3];
00528     }
00529 else
00530     {
00531     /* Handle partial first packed byte. */
00532     midStart = fragStart;
00533     remainder = (fragStart&3);
00534     if (remainder > 0)
00535         {
00536         UBYTE partial = *packed++;
00537         int partCount = 4 - remainder;
00538         for (i=partCount-1; i>=0; --i)
00539             {
00540             dna[i] = valToNt[partial&3];
00541             partial >>= 2;
00542             }
00543         midStart += partCount;
00544         dna += partCount;
00545         }
00546 
00547     /* Handle middle bytes. */
00548     remainder = fragEnd&3;
00549     midEnd = fragEnd - remainder;
00550     for (i=midStart; i<midEnd; i += 4)
00551         {
00552         UBYTE b = *packed++;
00553         dna[3] = valToNt[b&3];
00554         b >>= 2;
00555         dna[2] = valToNt[b&3];
00556         b >>= 2;
00557         dna[1] = valToNt[b&3];
00558         b >>= 2;
00559         dna[0] = valToNt[b&3];
00560         dna += 4;
00561         }
00562 
00563     if (remainder >0)
00564         {
00565         UBYTE part = *packed;
00566         part >>= (8-remainder-remainder);
00567         for (i=remainder-1; i>=0; --i)
00568             {
00569             dna[i] = valToNt[part&3];
00570             part >>= 2;
00571             }
00572         }
00573     }
00574 freez(&packedAlloc);
00575 
00576 if (nBlockCount > 0)
00577     {
00578     int startIx = findGreatestLowerBound(nBlockCount, nStarts, fragStart);
00579     for (i=startIx; i<nBlockCount; ++i)
00580         {
00581         int s = nStarts[i];
00582         int e = s + nSizes[i];
00583         if (s >= fragEnd)
00584             break;
00585         if (s < fragStart)
00586            s = fragStart;
00587         if (e > fragEnd)
00588            e = fragEnd;
00589         if (s < e)
00590             memset(seq->dna + s - fragStart, 'n', e - s);
00591         }
00592     }
00593 
00594 if (doMask)
00595     {
00596     toUpperN(seq->dna, seq->size);
00597     if (maskBlockCount > 0)
00598         {
00599         int startIx = findGreatestLowerBound(maskBlockCount, maskStarts, 
00600                 fragStart);
00601         for (i=startIx; i<maskBlockCount; ++i)
00602             {
00603             int s = maskStarts[i];
00604             int e = s + maskSizes[i];
00605             if (s >= fragEnd)
00606                 break;
00607             if (s < fragStart)
00608                 s = fragStart;
00609             if (e > fragEnd)
00610                 e = fragEnd;
00611             if (s < e)
00612                 toLowerN(seq->dna + s - fragStart, e - s);
00613             }
00614         }
00615     }
00616 freez(&nStarts);
00617 freez(&nSizes);
00618 freez(&maskStarts);
00619 freez(&maskSizes);
00620 if (retFullSize != NULL)
00621     *retFullSize = seqSize;
00622 return seq;
00623 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* twoBitReadSeqFragLower ( struct twoBitFile tbf,
char *  name,
int  fragStart,
int  fragEnd 
) [read]

Definition at line 635 of file twoBit.c.

References FALSE, and twoBitReadSeqFragExt().

Referenced by gfCountTilesInTwoBit(), gfiExpandAndLoadCached(), gfIndexNibsAndTwoBits(), readMaskedTwoBit(), and readSeqFrag().

00638 {
00639 return twoBitReadSeqFragExt(tbf, name, fragStart, fragEnd, FALSE, NULL);
00640 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct slName* twoBitSeqNames ( char *  fileName  )  [read]

Definition at line 695 of file twoBit.c.

References twoBitFile::indexList, name, twoBitIndex::name, twoBitIndex::next, slAddHead, slNameNew, slReverse(), twoBitClose(), and twoBitOpen().

00697 {
00698 struct twoBitFile *tbf = twoBitOpen(fileName);
00699 struct twoBitIndex *index;
00700 struct slName *name, *list = NULL;
00701 for (index = tbf->indexList; index != NULL; index = index->next)
00702     {
00703     name = slNameNew(index->name);
00704     slAddHead(&list, name);
00705     }
00706 twoBitClose(&tbf);
00707 slReverse(&list);
00708 return list;
00709 }

Here is the call graph for this function:

int twoBitSeqSize ( struct twoBitFile tbf,
char *  name 
)

Definition at line 642 of file twoBit.c.

References twoBitFile::f, twoBitFile::isSwapped, readBits32(), and twoBitSeekTo().

Referenced by dnaLoadSingle(), and gfiExpandAndLoadCached().

00644 {
00645 twoBitSeekTo(tbf, name);
00646 return readBits32(tbf->f, tbf->isSwapped);
00647 }

Here is the call graph for this function:

Here is the caller graph for this function:

int twoBitSeqSizeNoNs ( struct twoBitFile tbf,
char *  seqName 
)

Definition at line 973 of file twoBit.c.

References AllocArray, bits32, byteSwap32(), twoBitFile::f, freez(), twoBitFile::isSwapped, readBits32(), and twoBitSeekTo().

00975 {
00976 int nBlockCount;
00977 int size;
00978 
00979 twoBitSeekTo(tbf, seqName);
00980 
00981 size = readBits32(tbf->f, tbf->isSwapped);
00982 
00983 /* Read in blocks of N. */
00984 nBlockCount = readBits32(tbf->f, tbf->isSwapped);
00985 
00986 if (nBlockCount > 0)
00987     {
00988     bits32 *nStarts = NULL, *nSizes = NULL;
00989     
00990     int i;
00991 
00992     AllocArray(nStarts, nBlockCount);
00993     AllocArray(nSizes, nBlockCount);
00994     fread(nStarts, sizeof(nStarts[0]), nBlockCount, tbf->f);
00995     fread(nSizes, sizeof(nSizes[0]), nBlockCount, tbf->f);
00996     if (tbf->isSwapped)
00997         {
00998         for (i=0; i<nBlockCount; ++i)
00999             {
01000             nStarts[i] = byteSwap32(nStarts[i]);
01001             nSizes[i] = byteSwap32(nSizes[i]);
01002             }
01003         }
01004 
01005     for (i=0; i<nBlockCount; ++i)
01006         {
01007         size -= nSizes[i];
01008         }
01009 
01010     freez(&nStarts);
01011     freez(&nSizes);
01012     }
01013 
01014 return(size);
01015 }

Here is the call graph for this function:

void twoBitSpecFree ( struct twoBitSpec **  specPtr  ) 

Definition at line 915 of file twoBit.c.

References twoBitSpec::fileName, freeMem(), twoBitSeqSpec::name, twoBitSpec::seqs, and slPopHead().

Referenced by twoBitIsSpec(), twoBitLoadAll(), and twoBitSpecNew().

00917 {
00918 struct twoBitSpec *spec = *specPtr;
00919 if (spec != NULL)
00920     {
00921     struct twoBitSeqSpec *seq;
00922     while ((seq = slPopHead(&spec->seqs)) != NULL)
00923         {
00924         freeMem(seq->name);
00925         freeMem(seq);
00926         }
00927     freeMem(spec->fileName);
00928     freeMem(spec);
00929     *specPtr = NULL;
00930     }
00931 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct twoBitSpec* twoBitSpecNew ( char *  specStr  )  [read]

Definition at line 839 of file twoBit.c.

References AllocArray, AllocVar, chopString(), cloneString(), endsWith(), twoBitSpec::fileName, parseSeqSpec(), twoBitSpec::seqs, slReverse(), slSafeAddHead(), and twoBitSpecFree().

Referenced by searchOneIndex(), twoBitIsSpec(), and twoBitLoadAll().

00841                                    :
00842  *
00843  *    file/path/input.2bit[:seqSpec1][,seqSpec2,...]
00844  *
00845  * where seqSpec is either
00846  *     seqName
00847  *  or
00848  *     seqName:start-end
00849  *
00850  * free result with twoBitSpecFree().
00851  */
00852 {
00853 char *s, *e;
00854 int i, numSeqs;
00855 char **seqSpecs;
00856 struct twoBitSpec *spec;
00857 AllocVar(spec);
00858 spec->fileName = cloneString(specStr);
00859 
00860 /* start with final file name  */
00861 s = strrchr(spec->fileName, '/');
00862 if (s == NULL)
00863     s = spec->fileName;
00864 else
00865     s++;
00866 
00867 /* find end of file name and zero-terminate */
00868 e = strchr(s, ':');
00869 if (e == NULL)
00870     s = NULL; /* just file name */
00871 else
00872     {
00873     *e++ = '\0';
00874     s = e;
00875     }
00876 if (!endsWith(spec->fileName, ".2bit"))
00877     {
00878     twoBitSpecFree(&spec);
00879     return NULL; /* not a 2bit file */
00880     }
00881 
00882 if (s != NULL)
00883     {
00884     /* chop seqs at commas and parse */
00885     numSeqs = chopString(s, ",", NULL, 0);
00886     AllocArray(seqSpecs, numSeqs);
00887     chopString(s, ",", seqSpecs, numSeqs);
00888     for (i = 0; i< numSeqs; i++)
00889         slSafeAddHead(&spec->seqs, parseSeqSpec(seqSpecs[i]));
00890     slReverse(&spec->seqs);
00891     }
00892 return spec;
00893 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct twoBitSpec* twoBitSpecNewFile ( char *  twoBitFile,
char *  specFile 
) [read]

Definition at line 895 of file twoBit.c.

References AllocVar, cloneString(), twoBitSpec::fileName, lineFileClose(), lineFileNextReal(), lineFileOpen(), parseSeqSpec(), twoBitSpec::seqs, slReverse(), slSafeAddHead(), trimSpaces(), and TRUE.

00897                                                                   :
00898  *     seqName
00899  *  or
00900  *     seqName:start-end
00901  */
00902 {
00903 struct lineFile *lf = lineFileOpen(specFile, TRUE);
00904 char *line;
00905 struct twoBitSpec *spec;
00906 AllocVar(spec);
00907 spec->fileName = cloneString(twoBitFile);
00908 while (lineFileNextReal(lf, &line))
00909     slSafeAddHead(&spec->seqs, parseSeqSpec(trimSpaces(line)));
00910 slReverse(&spec->seqs);
00911 lineFileClose(&lf);
00912 return spec;
00913 }

Here is the call graph for this function:

long long twoBitTotalSize ( struct twoBitFile tbf  ) 

Definition at line 649 of file twoBit.c.

References twoBitFile::f, twoBitFile::indexList, twoBitFile::isSwapped, twoBitIndex::next, twoBitIndex::offset, readBits32(), and SEEK_SET.

Referenced by twoBitCheckTotalSize().

00651 {
00652 struct twoBitIndex *index;
00653 long long totalSize = 0;
00654 for (index = tbf->indexList; index != NULL; index = index->next)
00655     {
00656     fseek(tbf->f, index->offset, SEEK_SET);
00657     totalSize += readBits32(tbf->f, tbf->isSwapped);
00658     }
00659 return totalSize;
00660 }

Here is the call graph for this function:

Here is the caller graph for this function:

void twoBitWriteHeader ( struct twoBit twoBitList,
FILE *  f 
)

Definition at line 217 of file twoBit.c.

References bits32, errAbort(), twoBit::name, twoBit::next, twoBit::reserved, twoBit::size, slCount(), twoBitSig, twoBitSizeInFile(), writeOne, and writeString().

00220 {
00221 bits32 sig = twoBitSig;
00222 bits32 version = 0;
00223 bits32 seqCount = slCount(twoBitList);
00224 bits32 reserved = 0;
00225 bits32 offset = 0;
00226 struct twoBit *twoBit;
00227 long long counter = 0; /* check for 32 bit overflow */
00228 
00229 /* Write out fixed parts of header. */
00230 writeOne(f, sig);
00231 writeOne(f, version);
00232 writeOne(f, seqCount);
00233 writeOne(f, reserved);
00234 
00235 /* Figure out location of first byte past index.
00236  * Each index entry contains 4 bytes of offset information
00237  * and the name of the sequence, which is variable length. */
00238 offset = sizeof(sig) + sizeof(version) + sizeof(seqCount) + sizeof(reserved);
00239 for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
00240     {
00241     int nameLen = strlen(twoBit->name);
00242     if (nameLen > 255)
00243         errAbort("name %s too long", twoBit->name);
00244     offset += nameLen + 1 + sizeof(bits32);
00245     }
00246 
00247 /* Write out index. */
00248 for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
00249     {
00250     int size = twoBitSizeInFile(twoBit);
00251     writeString(f, twoBit->name);
00252     writeOne(f, offset);
00253     offset += size;
00254     counter += (long long)size;
00255     if (counter > UINT_MAX )
00256         errAbort("Error in faToTwoBit, index overflow at %s. The 2bit format "
00257                 "does not support indexes larger than %dGb, \n"
00258                 "please split up into smaller files.\n", 
00259                 twoBit->name, UINT_MAX/1000000000);
00260     }
00261 }

Here is the call graph for this function:

void twoBitWriteOne ( struct twoBit twoBit,
FILE *  f 
)

Definition at line 191 of file twoBit.c.

References twoBit::data, twoBit::maskBlockCount, twoBit::maskSizes, twoBit::maskStarts, mustWrite(), twoBit::nBlockCount, twoBit::nSizes, twoBit::nStarts, packedSize(), twoBit::reserved, twoBit::size, and writeOne.

00195 {
00196 writeOne(f, twoBit->size);
00197 writeOne(f, twoBit->nBlockCount);
00198 if (twoBit->nBlockCount > 0)
00199     {
00200     fwrite(twoBit->nStarts, sizeof(twoBit->nStarts[0]), 
00201         twoBit->nBlockCount, f);
00202     fwrite(twoBit->nSizes, sizeof(twoBit->nSizes[0]), 
00203         twoBit->nBlockCount, f);
00204     }
00205 writeOne(f, twoBit->maskBlockCount);
00206 if (twoBit->maskBlockCount > 0)
00207     {
00208     fwrite(twoBit->maskStarts, sizeof(twoBit->maskStarts[0]), 
00209         twoBit->maskBlockCount, f);
00210     fwrite(twoBit->maskSizes, sizeof(twoBit->maskSizes[0]), 
00211         twoBit->maskBlockCount, f);
00212     }
00213 writeOne(f, twoBit->reserved);
00214 mustWrite(f, twoBit->data, packedSize(twoBit->size));
00215 }

Here is the call graph for this function:


Generated on Tue Dec 25 19:18:28 2007 for blat by  doxygen 1.5.2