lib/nib.c File Reference

#include "common.h"
#include "hash.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "nib.h"
#include "sig.h"

Include dependency graph for nib.c:

Go to the source code of this file.

Data Structures

struct  nibStream

Functions

static char * findNibSubrange (char *fileName)
static void parseSubrange (char *subrange, char *name, unsigned *start, unsigned *end)
void nibParseName (unsigned options, char *fileSpec, char *filePath, char *name, unsigned *start, unsigned *end)
void nibOpenVerify (char *fileName, FILE **retFile, int *retSize)
static struct dnaSeqnibInput (int options, char *fileName, char *seqName, FILE *f, int seqSize, int start, int size)
static void nibOutput (int options, struct dnaSeq *seq, char *fileName)
dnaSeqnibLdPartMasked (int options, char *fileName, FILE *f, int seqSize, int start, int size)
dnaSeqnibLdPart (char *fileName, FILE *f, int seqSize, int start, int size)
dnaSeqnibLoadPartMasked (int options, char *fileName, int start, int size)
dnaSeqnibLoadPart (char *fileName, int start, int size)
dnaSeqnibLoadAllMasked (int options, char *fileName)
dnaSeqnibLoadAll (char *fileName)
void nibWriteMasked (int options, struct dnaSeq *seq, char *fileName)
void nibWrite (struct dnaSeq *seq, char *fileName)
nibStreamnibStreamOpen (char *fileName)
void nibStreamClose (struct nibStream **pNs)
void nibStreamOne (struct nibStream *ns, DNA base)
void nibStreamMany (struct nibStream *ns, DNA *dna, int size)
boolean nibIsFile (char *fileName)
boolean nibIsRange (char *fileName)
nibInfonibInfoNew (char *path)
void nibInfoFree (struct nibInfo **pNib)
nibInfonibInfoFromCache (struct hash *hash, char *nibDir, char *nibName)
int nibGetSize (char *nibFile)

Variables

static char const rcsid [] = "$Id: nib.c,v 1.24 2007/03/13 20:43:05 kent Exp $"


Function Documentation

static char* findNibSubrange ( char *  fileName  )  [static]

Definition at line 15 of file nib.c.

Referenced by nibIsFile(), nibIsRange(), and nibParseName().

00018 {
00019 char *baseName = strrchr(fileName, '/');
00020 baseName = (baseName == NULL) ? fileName : baseName+1;
00021 return strchr(baseName, ':');
00022 }

Here is the caller graph for this function:

int nibGetSize ( char *  nibFile  ) 

Definition at line 507 of file nib.c.

References carefulClose(), nibOpenVerify(), and nibInfo::size.

00509 {
00510 FILE* fh;
00511 int size;
00512 
00513 nibOpenVerify(nibFile, &fh, &size);
00514 carefulClose(&fh);
00515 return size;
00516 }

Here is the call graph for this function:

void nibInfoFree ( struct nibInfo **  pNib  ) 

Definition at line 480 of file nib.c.

References carefulClose(), nibInfo::f, nibInfo::fileName, freeMem(), and freez().

Referenced by gfFileCacheFreeEl(), and nibTwoCacheFree().

00482 {
00483 struct nibInfo *nib = *pNib;
00484 if (nib != NULL)
00485     {
00486     carefulClose(&nib->f);
00487     freeMem(nib->fileName);
00488     freez(pNib);
00489     }
00490 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct nibInfo* nibInfoFromCache ( struct hash hash,
char *  nibDir,
char *  nibName 
) [read]

Definition at line 492 of file nib.c.

References hashAdd(), hashFindVal(), nibInfoNew(), PATH_LEN, and safef().

Referenced by nibTwoCacheSeq(), and nibTwoCacheSeqPartExt().

00494 {
00495 struct nibInfo *nib;
00496 char path[PATH_LEN];
00497 safef(path, sizeof(path), "%s/%s.nib", nibDir, nibName);
00498 nib = hashFindVal(hash, path);
00499 if (nib == NULL)
00500     {
00501     nib = nibInfoNew(path);
00502     hashAdd(hash, path, nib);
00503     }
00504 return nib;
00505 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct nibInfo* nibInfoNew ( char *  path  )  [read]

Definition at line 470 of file nib.c.

References AllocVar, cloneString(), nibInfo::f, nibInfo::fileName, nibOpenVerify(), and nibInfo::size.

Referenced by gfiExpandAndLoadCached(), and nibInfoFromCache().

00472 {
00473 struct nibInfo *nib;
00474 AllocVar(nib);
00475 nib->fileName = cloneString(path);
00476 nibOpenVerify(path, &nib->f, &nib->size);
00477 return nib;
00478 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct dnaSeq* nibInput ( int  options,
char *  fileName,
char *  seqName,
FILE *  f,
int  seqSize,
int  start,
int  size 
) [static, read]

Definition at line 157 of file nib.c.

References AllocVar, bitAlloc(), bits32, bitSetOne(), cloneString(), dnaSeq::dna, errAbort(), dnaSeq::mask, mask, MASKED_BASE_BIT, dnaSeq::name, needLargeMem(), NIB_MASK_MIXED, SEEK_SET, dnaSeq::size, valToNt, and valToNtMasked.

Referenced by nibLdPartMasked(), and nibLoadAllMasked().

00160 {
00161 int end;
00162 DNA *d;
00163 int bVal;
00164 DNA *valToNtTbl = ((options &  NIB_MASK_MIXED) ? valToNtMasked : valToNt);
00165 struct dnaSeq *seq;
00166 Bits* mask = NULL;
00167 int bytePos, byteSize;
00168 int maskIdx = 0;
00169 
00170 assert(start >= 0);
00171 assert(size >= 0);
00172 
00173 end = start+size;
00174 if (end > seqSize)
00175     errAbort("nib read past end of file (%d %d) in file: %s", 
00176              end, seqSize, (fileName != NULL ? fileName : "(NULL)"));
00177 
00178 AllocVar(seq);
00179 seq->size = size;
00180 seq->name = cloneString(seqName);
00181 seq->dna = d = needLargeMem(size+1);
00182 if (options & NIB_MASK_MIXED)
00183     seq->mask = mask = bitAlloc(size);
00184 
00185 bytePos = (start>>1);
00186 fseek(f, bytePos + 2*sizeof(bits32), SEEK_SET);
00187 if (start & 1)
00188     {
00189     bVal = getc_unlocked(f);
00190     if (bVal < 0)
00191         {
00192         errAbort("Read error 1 in %s", fileName);
00193         }
00194     *d++ = valToNtTbl[(bVal&0xf)];
00195     size -= 1;
00196     if (mask != NULL)
00197         {
00198         if ((bVal&0xf&MASKED_BASE_BIT) == 0)
00199             bitSetOne(mask, maskIdx);
00200         maskIdx++;
00201         }
00202     }
00203 byteSize = (size>>1);
00204 while (--byteSize >= 0)
00205     {
00206     bVal = getc_unlocked(f);
00207     if (bVal < 0)
00208         errAbort("Read error 2 in %s", fileName);
00209     d[0] = valToNtTbl[(bVal>>4)];
00210     d[1] = valToNtTbl[(bVal&0xf)];
00211     d += 2;
00212     if (mask != NULL)
00213         {
00214         if (((bVal>>4)&0xf) == 0)
00215             bitSetOne(mask, maskIdx);
00216         if ((bVal&0xf) == 0)
00217             bitSetOne(mask, maskIdx+1);
00218         maskIdx += 2;
00219         }
00220     }
00221 if (size&1)
00222     {
00223     bVal = getc_unlocked(f);
00224     if (bVal < 0)
00225         errAbort("Read error 3 in %s", fileName);
00226     *d++ = valToNtTbl[(bVal>>4)];
00227     if (mask != NULL)
00228         {
00229         if ((bVal>>4) == 0)
00230             bitSetOne(mask, maskIdx);
00231         maskIdx++;
00232         }
00233     }
00234 *d = 0;
00235 return seq;
00236 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean nibIsFile ( char *  fileName  ) 

Definition at line 444 of file nib.c.

References endsWith(), and findNibSubrange().

Referenced by dnaLoadSingle(), gfClientFileArray(), gfClientSeqList(), gfFileCacheFreeEl(), gfiExpandAndLoadCached(), gfiGetSeqName(), gfIndexNibsAndTwoBits(), gfIndexTransNibsAndTwoBits(), gfMakeOoc(), readSeqFrag(), and searchOneIndex().

00446 {
00447 boolean isANib;
00448 char *subrange = findNibSubrange(fileName);
00449 if (subrange != NULL)
00450     *subrange = '\0';
00451 isANib = endsWith(fileName, ".nib") || endsWith(fileName, ".NIB");
00452 if (subrange != NULL)
00453     *subrange = ':';
00454 return isANib;
00455 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean nibIsRange ( char *  fileName  ) 

Definition at line 457 of file nib.c.

References endsWith(), FALSE, and findNibSubrange().

00459 {
00460 boolean isANib;
00461 char *subrange = findNibSubrange(fileName);;
00462 if (subrange == NULL)
00463     return FALSE;
00464 *subrange = '\0';
00465 isANib = endsWith(fileName, ".nib") || endsWith(fileName, ".NIB");
00466 *subrange = ':';
00467 return isANib;
00468 }

Here is the call graph for this function:

struct dnaSeq* nibLdPart ( char *  fileName,
FILE *  f,
int  seqSize,
int  start,
int  size 
) [read]

Definition at line 301 of file nib.c.

References nibLdPartMasked().

Referenced by gfAddTilesInNib(), gfCountTilesInNib(), gfiExpandAndLoadCached(), and nibTwoCacheSeq().

00303 {
00304 return nibLdPartMasked(0, fileName, f, seqSize, start, size);
00305 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* nibLdPartMasked ( int  options,
char *  fileName,
FILE *  f,
int  seqSize,
int  start,
int  size 
) [read]

Definition at line 292 of file nib.c.

References nibInput(), and safef().

Referenced by nibLdPart(), nibLoadPartMasked(), and nibTwoCacheSeqPartExt().

00295 {
00296 char nameBuf[512];
00297 safef(nameBuf, sizeof(nameBuf), "%s:%d-%d", fileName, start, start+size);
00298 return nibInput(options, fileName, nameBuf, f, seqSize, start, size);
00299 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* nibLoadAll ( char *  fileName  )  [read]

Definition at line 352 of file nib.c.

References nibLoadAllMasked().

Referenced by gfMakeOoc(), and readMaskedNib().

00354 {
00355 return nibLoadAllMasked(0, fileName);
00356 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* nibLoadAllMasked ( int  options,
char *  fileName 
) [read]

Definition at line 325 of file nib.c.

References name, nibInput(), nibOpenVerify(), nibParseName(), and PATH_LEN.

Referenced by dnaLoadSingle(), gfClientSeqList(), nibLoadAll(), nibTwoLoadOne(), readMaskedNib(), and searchOneIndex().

00328                    :
00329  *    /path/file.nib:seqid:start-end
00330  * or\n"
00331  *    /path/file.nib:start-end
00332  * With the first form, seqid becomes the id of the subrange, with the second
00333  * form, a sequence id of file:start-end will be used.
00334  */
00335 {
00336 struct dnaSeq *seq;
00337 FILE *f;
00338 int seqSize;
00339 char filePath[PATH_LEN];
00340 char name[PATH_LEN];
00341 unsigned start, end;
00342 
00343 nibParseName(options, fileName, filePath, name, &start, &end);
00344 nibOpenVerify(filePath, &f, &seqSize);
00345 if (end == 0)
00346     end = seqSize;
00347 seq = nibInput(options, fileName, name, f, seqSize, start, end-start);
00348 fclose(f);
00349 return seq;
00350 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* nibLoadPart ( char *  fileName,
int  start,
int  size 
) [read]

Definition at line 319 of file nib.c.

References nibLoadPartMasked().

Referenced by readSeqFrag().

00321 {
00322 return nibLoadPartMasked(0, fileName, start, size);
00323 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* nibLoadPartMasked ( int  options,
char *  fileName,
int  start,
int  size 
) [read]

Definition at line 307 of file nib.c.

References nibLdPartMasked(), and nibOpenVerify().

Referenced by nibLoadPart().

00309 {
00310 struct dnaSeq *seq;
00311 FILE *f;
00312 int seqSize;
00313 nibOpenVerify(fileName, &f, &seqSize);
00314 seq = nibLdPartMasked(options, fileName, f, seqSize, start, size);
00315 fclose(f);
00316 return seq;
00317 }

Here is the call graph for this function:

Here is the caller graph for this function:

void nibOpenVerify ( char *  fileName,
FILE **  retFile,
int *  retSize 
)

Definition at line 84 of file nib.c.

References bits32, byteSwap32(), dnaUtilOpen(), errAbort(), errno, mustReadOne, nibSig, and safef().

Referenced by dnaLoadSingle(), gfAddTilesInNib(), gfCountTilesInNib(), nibGetSize(), nibInfoNew(), nibLoadAllMasked(), and nibLoadPartMasked().

00086 {
00087 bits32 size;
00088 bits32 sig;
00089 FILE *f = fopen(fileName, "rb");
00090 char buffer[512];
00091 char buffer2[512];
00092 char buffer3[512];
00093 
00094 if (f == NULL)
00095     {
00096     /* see if nib is down a few directories ala faSplit -outDirDepth */
00097     char *ptr = NULL;
00098     char *dir, *file;
00099     struct stat statBuf;
00100 
00101     /* divide fileName into file and directory components */
00102     safef(buffer, sizeof(buffer), "%s", fileName);
00103     if ((ptr = strrchr(buffer, '/')) != NULL)
00104         {
00105         *ptr++ = 0;
00106         dir = buffer;
00107         file = ptr;
00108         }
00109     else
00110         {
00111         dir = "";
00112         file = buffer;
00113         }
00114     
00115     buffer3[0] = 0;
00116     /* start at the end of the fileName (minus .nib) */
00117     for(ptr = &file[strlen(file) - 5]; ; )
00118         {
00119         strcpy(buffer2, buffer3);
00120         if (isdigit(*ptr))
00121             {
00122             /* if we have a digit in the fileName, see if there is a directory with this name */
00123             safef(buffer3, sizeof(buffer3), "%c/%s",*ptr,buffer2);
00124             ptr--;
00125             }
00126         else
00127             /* we've run out of digits in the fileName, just add 0's */
00128             safef(buffer3, sizeof(buffer3), "0/%s",buffer2);
00129 
00130         /* check to see if this directory exists */
00131         safef(buffer2, sizeof(buffer2), "%s/%s", dir, buffer3);
00132         if (stat(buffer2, &statBuf) < 0)
00133             break;
00134 
00135         /* directory exists, see if our file is down there */
00136         safef(buffer2, sizeof(buffer2), "%s/%s/%s", dir, buffer3, file);
00137         if  ((f = fopen(buffer2, "rb")) != NULL)
00138             break;
00139         }
00140     if (f == NULL)
00141         errAbort("Can't open %s to read: %s", fileName,  strerror(errno));
00142     }
00143 dnaUtilOpen();
00144 mustReadOne(f, sig);
00145 mustReadOne(f, size);
00146 if (sig != nibSig)
00147     {
00148     sig = byteSwap32(sig);
00149     size = byteSwap32(size);
00150     if (sig != nibSig)
00151         errAbort("%s is not a good .nib file.",  fileName);
00152     }
00153 *retSize = size;
00154 *retFile = f;
00155 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void nibOutput ( int  options,
struct dnaSeq seq,
char *  fileName 
) [static]

Definition at line 238 of file nib.c.

References bitReadOne(), bits32, carefulClose(), dnaSeq::dna, errAbort(), dnaSeq::mask, mask, MASKED_BASE_BIT, mustOpen(), NIB_MASK_MAP, NIB_MASK_MIXED, nibSig, ntVal5, ntValMasked, dnaSeq::size, UBYTE, and writeOne.

Referenced by nibWriteMasked().

00241 {
00242 UBYTE byte;
00243 DNA *dna = seq->dna;
00244 int dVal1, dVal2;
00245 bits32 size = seq->size;
00246 int byteCount = (size>>1);
00247 bits32 sig = nibSig;
00248 int *ntValTbl = ((options & NIB_MASK_MIXED) ? ntValMasked : ntVal5);
00249 Bits* mask = ((options & NIB_MASK_MAP) ? seq->mask : NULL);
00250 int maskIdx = 0;
00251 FILE *f = mustOpen(fileName, "w");
00252 
00253 assert(sizeof(bits32) == 4);
00254 
00255 writeOne(f, sig);
00256 writeOne(f, seq->size);
00257 
00258 printf("Writing %d bases in %d bytes\n", seq->size, ((seq->size+1)/2) + 8);
00259 while (--byteCount >= 0)
00260     {
00261     dVal1 = ntValTbl[(int)dna[0]];
00262     dVal2 = ntValTbl[(int)dna[1]];
00263     /* Set from mask, remember bit in character is opposite sense of bit
00264      * in mask. */
00265     if (mask != NULL)
00266         {
00267         if (!bitReadOne(mask, maskIdx))
00268             dVal1 |= MASKED_BASE_BIT;
00269         if (!bitReadOne(mask, maskIdx+1))
00270             dVal2 |= MASKED_BASE_BIT;
00271         maskIdx += 2;
00272         }
00273     byte = (dVal1<<4) | dVal2;
00274     if (putc(byte, f) < 0)
00275         {
00276         perror("");
00277         errAbort("Couldn't write all of %s", fileName);
00278         }
00279     dna += 2;
00280     }
00281 if (size & 1)
00282     {
00283     dVal1 = ntValTbl[(int)dna[0]];
00284     if ((mask != NULL) && !bitReadOne(mask, maskIdx))
00285         dVal1 |= MASKED_BASE_BIT;
00286     byte = (dVal1<<4);
00287     putc(byte, f);
00288     }
00289 carefulClose(&f);
00290 }

Here is the call graph for this function:

Here is the caller graph for this function:

void nibParseName ( unsigned  options,
char *  fileSpec,
char *  filePath,
char *  name,
unsigned *  start,
unsigned *  end 
)

Definition at line 48 of file nib.c.

References findNibSubrange(), NIB_BASE_NAME, parseSubrange(), and splitPath().

Referenced by dnaLoadSingle(), and nibLoadAllMasked().

00054 {
00055 char *subrange = findNibSubrange(fileSpec);
00056 if (subrange != NULL)
00057     {
00058     *subrange = '\0';
00059     parseSubrange(subrange, name, start, end);
00060     strcpy(filePath, fileSpec);
00061     *subrange = ':';
00062     if (strlen(name) == 0)
00063         {
00064         /* no name in spec, generate one */
00065         if (options & NIB_BASE_NAME)
00066             splitPath(filePath, NULL, name, NULL);
00067         else
00068             strcpy(name, filePath);
00069         sprintf(name+strlen(name), ":%u-%u", *start, *end);
00070         }
00071     }
00072 else
00073     {
00074     *start = 0;
00075     *end = 0;
00076     strcpy(filePath, fileSpec);
00077     if (options & NIB_BASE_NAME)
00078         splitPath(fileSpec, NULL, name, NULL);
00079     else
00080         strcpy(name, fileSpec);
00081     }
00082 }

Here is the call graph for this function:

Here is the caller graph for this function:

void nibStreamClose ( struct nibStream **  pNs  ) 

Definition at line 401 of file nib.c.

References bits32, nibStream::byte, nibStream::f, nibStream::fileName, freeMem(), freez(), nibSig, SEEK_SET, nibStream::size, and writeOne.

00403 {
00404 struct nibStream *ns = *pNs;
00405 FILE *f;
00406 bits32 sig = nibSig;
00407 if (ns == NULL)
00408     return;
00409 f = ns->f;
00410 if (ns->size&1)
00411     writeOne(f, ns->byte);
00412 fseek(f,  0L, SEEK_SET);
00413 writeOne(f, sig);
00414 writeOne(f, ns->size);
00415 fclose(f);
00416 freeMem(ns->fileName);
00417 freez(pNs);
00418 }

Here is the call graph for this function:

void nibStreamMany ( struct nibStream ns,
DNA dna,
int  size 
)

Definition at line 436 of file nib.c.

References nibStreamOne().

00438 {
00439 int i;
00440 for (i=0; i<size; ++i)
00441     nibStreamOne(ns, *dna++);
00442 }

Here is the call graph for this function:

void nibStreamOne ( struct nibStream ns,
DNA  base 
)

Definition at line 420 of file nib.c.

References nibStream::byte, nibStream::f, ntVal5, nibStream::size, UBYTE, and writeOne.

Referenced by nibStreamMany().

00422 {
00423 UBYTE ub = ntVal5[(int)base];
00424 
00425 if ((++ns->size&1) == 0)
00426     {
00427     ub += ns->byte;
00428     writeOne(ns->f, ub);
00429     }
00430 else
00431     {
00432     ns->byte = (ub<<4);
00433     }
00434 }

Here is the caller graph for this function:

struct nibStream* nibStreamOpen ( char *  fileName  )  [read]

Definition at line 383 of file nib.c.

References AllocVar, cloneString(), dnaUtilOpen(), nibStream::f, nibStream::fileName, mustOpen(), nibStream::size, and writeOne.

00385 {
00386 struct nibStream *ns;
00387 FILE *f;
00388 
00389 dnaUtilOpen();
00390 AllocVar(ns);
00391 ns->f = f = mustOpen(fileName, "wb");
00392 ns->fileName = cloneString(fileName);
00393 
00394 /* Write header - initially zero.  Will fix it up when we close. */
00395 writeOne(f, ns->size);
00396 writeOne(f, ns->size);
00397 
00398 return ns;
00399 }

Here is the call graph for this function:

void nibWrite ( struct dnaSeq seq,
char *  fileName 
)

Definition at line 365 of file nib.c.

References nibWriteMasked().

00367 {
00368     nibWriteMasked(0, seq, fileName);
00369 }

Here is the call graph for this function:

void nibWriteMasked ( int  options,
struct dnaSeq seq,
char *  fileName 
)

Definition at line 358 of file nib.c.

References nibOutput().

Referenced by nibWrite().

00361 {
00362     nibOutput(options, seq, fileName);
00363 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void parseSubrange ( char *  subrange,
char *  name,
unsigned *  start,
unsigned *  end 
) [static]

Definition at line 24 of file nib.c.

References errAbort().

Referenced by nibParseName().

00027 {
00028 char *rangePart = strchr(subrange+1, ':');
00029 if (rangePart != NULL)
00030     {
00031     /* :seqId:start-end form */
00032     *rangePart = '\0';
00033     strcpy(name, subrange+1);
00034     *rangePart = ':';
00035     rangePart++;
00036     }
00037 else
00038     {
00039     /* :start-end form */
00040     rangePart = subrange+1;
00041     strcpy(name, ""); 
00042     }
00043 if ((sscanf(rangePart, "%u-%u", start, end) != 2) || (*start > *end))
00044     errAbort("can't parse nib file subsequence specification: %s",
00045              subrange);
00046 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char const rcsid[] = "$Id: nib.c,v 1.24 2007/03/13 20:43:05 kent Exp $" [static]

Definition at line 13 of file nib.c.


Generated on Tue Dec 25 20:05:26 2007 for blat by  doxygen 1.5.2