inc/fa.h File Reference

#include "dnaseq.h"
#include "linefile.h"

Include dependency graph for fa.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

dnaSeqfaReadDna (char *fileName)
aaSeqfaReadAa (char *fileName)
bioSeqfaReadSeq (char *fileName, boolean isDna)
dnaSeqfaReadAllDna (char *fileName)
dnaSeqfaReadAllPep (char *fileName)
dnaSeqfaReadAllSeq (char *fileName, boolean isDna)
dnaSeqfaReadAllMixed (char *fileName)
hashfaReadAllIntoHash (char *fileName, enum dnaCase dnaCase)
dnaSeqfaReadAllMixedInLf (struct lineFile *lf)
dnaSeqfaReadOneDnaSeq (FILE *f, char *name, boolean mustStartWithSign)
boolean faReadNext (FILE *f, char *defaultName, boolean mustStartWithComment, char **retCommentLine, struct dnaSeq **retSeq)
boolean faReadMixedNext (FILE *f, boolean preserveCase, char *defaultName, boolean mustStartWithComment, char **retCommentLine, struct dnaSeq **retSeq)
dnaSeqfaFromMemText (char *text)
bioSeqfaSeqFromMemText (char *text, boolean isDna)
bioSeqfaNextSeqFromMemText (char **pText, boolean isDna)
bioSeqfaNextSeqFromMemTextRaw (char **pText)
bioSeqfaSeqListFromMemText (char *text, boolean isDna)
bioSeqfaSeqListFromMemTextRaw (char *text)
boolean faFastReadNext (FILE *f, DNA **retDna, int *retSize, char **retName)
boolean faSpeedReadNext (struct lineFile *lf, DNA **retDna, int *retSize, char **retName)
boolean faPepSpeedReadNext (struct lineFile *lf, DNA **retDna, int *retSize, char **retName)
boolean faSomeSpeedReadNext (struct lineFile *lf, DNA **retDna, int *retSize, char **retName, boolean isDna)
boolean faMixedSpeedReadNext (struct lineFile *lf, DNA **retDna, int *retSize, char **retName)
void faToProtein (char *poly, int size)
void faToDna (char *poly, int size)
void faFreeFastBuf ()
void faWrite (char *fileName, char *startLine, DNA *dna, int dnaSize)
void faWriteNext (FILE *f, char *startLine, DNA *dna, int dnaSize)
void faWriteAll (char *fileName, bioSeq *seqList)


Function Documentation

boolean faFastReadNext ( FILE *  f,
DNA **  retDna,
int *  retSize,
char **  retName 
)

Definition at line 336 of file fa.c.

References ArraySize, dnaUtilOpen(), expandFaFastBuf(), faFastBuf, faFastBufSize, FALSE, name, ntChars, and TRUE.

00340 {
00341 int c;
00342 int bufIx = 0;
00343 static char name[256];
00344 int nameIx = 0;
00345 boolean gotSpace = FALSE;
00346 
00347 /* Seek to next '\n' and save first word as name. */
00348 dnaUtilOpen();
00349 name[0] = 0;
00350 for (;;)
00351     {
00352     if ((c = fgetc(f)) == EOF)
00353         {
00354         *retDna = NULL;
00355         *retSize = 0;
00356         *retName = NULL;
00357         return FALSE;
00358         }
00359     if (!gotSpace && nameIx < ArraySize(name)-1)
00360         {
00361         if (isspace(c))
00362             gotSpace = TRUE;
00363         else if (c != '>')
00364             {
00365             name[nameIx++] = c;
00366             }
00367         }
00368     if (c == '\n')
00369         break;
00370     }
00371 name[nameIx] = 0;
00372 /* Read until next '>' */
00373 for (;;)
00374     {
00375     c = fgetc(f);
00376     if (c == EOF || c == '>')
00377         c = 0;
00378     else if (!isalpha(c))
00379         continue;
00380     else
00381         {
00382         c = ntChars[c];
00383         if (c == 0) c = 'n';
00384         }
00385     if (bufIx >= faFastBufSize)
00386         expandFaFastBuf(bufIx, 0);
00387     faFastBuf[bufIx++] = c;
00388     if (c == 0)
00389         {
00390         *retDna = faFastBuf;
00391         *retSize = bufIx-1;
00392         *retName = name;
00393         return TRUE;
00394         }
00395     }
00396 }

Here is the call graph for this function:

void faFreeFastBuf (  ) 

Definition at line 329 of file fa.c.

References faFastBuf, faFastBufSize, and freez().

Referenced by faReadAllMixableInLf().

00331 {
00332 freez(&faFastBuf);
00333 faFastBufSize = 0;
00334 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* faFromMemText ( char *  text  )  [read]

Definition at line 257 of file fa.c.

References faNextSeqFromMemText(), and TRUE.

00263 {
00264 return faNextSeqFromMemText(&text, TRUE);
00265 }

Here is the call graph for this function:

boolean faMixedSpeedReadNext ( struct lineFile lf,
DNA **  retDna,
int *  retSize,
char **  retName 
)

Definition at line 430 of file fa.c.

References dnaUtilOpen(), errAbort(), expandFaFastBuf(), faFastBuf, faFastBufSize, FALSE, lineFile::fileName, firstWordInLine(), lineFileNext(), lineFileReuse(), lineFile::lineIx, name, skipLeadingSpaces(), TRUE, and warn().

Referenced by bigBlat(), dnaLoadNextFromStack(), faReadAllMixableInLf(), faSomeSpeedReadNext(), and searchOneIndex().

00433 {
00434 char c;
00435 int bufIx = 0;
00436 static char name[512];
00437 int lineSize, i;
00438 char *line;
00439 
00440 dnaUtilOpen();
00441 
00442 /* Read first line, make sure it starts with '>', and read first word
00443  * as name of sequence. */
00444 name[0] = 0;
00445 if (!lineFileNext(lf, &line, &lineSize))
00446     {
00447     *retDna = NULL;
00448     *retSize = 0;
00449     return FALSE;
00450     }
00451 if (line[0] == '>')
00452     {
00453     line = firstWordInLine(skipLeadingSpaces(line+1));
00454     if (line == NULL)
00455         errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName);
00456     strncpy(name, line, sizeof(name));
00457     name[sizeof(name)-1] = '\0'; /* Just to make sure name is NULL terminated. */
00458     }
00459 else
00460     {
00461     errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName);
00462     }
00463 /* Read until next '>' */
00464 for (;;)
00465     {
00466     if (!lineFileNext(lf, &line, &lineSize))
00467         break;
00468     if (line[0] == '>')
00469         {
00470         lineFileReuse(lf);
00471         break;
00472         }
00473     if (bufIx + lineSize >= faFastBufSize)
00474         expandFaFastBuf(bufIx, lineSize);
00475     for (i=0; i<lineSize; ++i)
00476         {
00477         c = line[i];
00478         if (isalpha(c) || c == '-')
00479             faFastBuf[bufIx++] = c;
00480         }
00481     }
00482 if (bufIx >= faFastBufSize)
00483     expandFaFastBuf(bufIx, 0);
00484 faFastBuf[bufIx] = 0;
00485 *retDna = faFastBuf;
00486 *retSize = bufIx;
00487 *retName = name;
00488 if (bufIx == 0)
00489     {
00490     warn("Invalid fasta format: sequence size == 0 for element %s",name);
00491     }
00492 
00493 return TRUE;
00494 }

Here is the call graph for this function:

Here is the caller graph for this function:

bioSeq* faNextSeqFromMemText ( char **  pText,
boolean  isDna 
)

Definition at line 211 of file fa.c.

References nextSeqFromMem(), and TRUE.

Referenced by faFromMemText(), faSeqFromMemText(), and faSeqListFromMemText().

00214 {
00215 return nextSeqFromMem(pText, isDna, TRUE);
00216 }

Here is the call graph for this function:

Here is the caller graph for this function:

bioSeq* faNextSeqFromMemTextRaw ( char **  pText  ) 

Definition at line 218 of file fa.c.

References FALSE, nextSeqFromMem(), and TRUE.

Referenced by faSeqListFromMemTextRaw().

00221 {
00222 return nextSeqFromMem(pText, TRUE, FALSE);
00223 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean faPepSpeedReadNext ( struct lineFile lf,
DNA **  retDna,
int *  retSize,
char **  retName 
)

Definition at line 545 of file fa.c.

References FALSE, and faSomeSpeedReadNext().

00547 {
00548 return faSomeSpeedReadNext(lf, retDna, retSize, retName, FALSE);
00549 }

Here is the call graph for this function:

aaSeq* faReadAa ( char *  fileName  )  [read]

Definition at line 290 of file fa.c.

References FALSE, and faReadSeq().

00292 {
00293 return faReadSeq(fileName, FALSE);
00294 }

Here is the call graph for this function:

struct dnaSeq* faReadAllDna ( char *  fileName  )  [read]

Definition at line 626 of file fa.c.

References faReadAllSeq(), and TRUE.

00628 {
00629 return faReadAllSeq(fileName, TRUE);
00630 }

Here is the call graph for this function:

struct hash* faReadAllIntoHash ( char *  fileName,
enum dnaCase  dnaCase 
) [read]

Definition at line 602 of file fa.c.

References dnaLower, dnaMixed, errAbort(), faReadAllSeqMixable(), hashAdd(), hashLookup(), hashNew, isDna(), dnaSeq::name, and dnaSeq::next.

00604 {
00605 boolean isDna = (dnaCase == dnaLower);
00606 boolean isMixed = (dnaCase == dnaMixed);
00607 struct dnaSeq *seqList = faReadAllSeqMixable(fileName, isDna, isMixed);
00608 struct hash *hash = hashNew(18);
00609 struct dnaSeq *seq;
00610 for (seq = seqList; seq != NULL; seq = seq->next)
00611     {
00612     if (hashLookup(hash, seq->name))
00613         errAbort("%s duplicated in %s", seq->name, fileName);
00614     hashAdd(hash, seq->name, seq);
00615     }
00616 return hash;
00617 }

Here is the call graph for this function:

struct dnaSeq* faReadAllMixed ( char *  fileName  )  [read]

Definition at line 638 of file fa.c.

References FALSE, faReadAllSeqMixable(), and TRUE.

Referenced by gfClientSeqList().

00640 {
00641 return faReadAllSeqMixable(fileName, FALSE, TRUE);
00642 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* faReadAllMixedInLf ( struct lineFile lf  )  [read]

Definition at line 644 of file fa.c.

References FALSE, faReadAllMixableInLf(), and TRUE.

00646 {
00647 return faReadAllMixableInLf(lf, FALSE, TRUE);
00648 }

Here is the call graph for this function:

struct dnaSeq* faReadAllPep ( char *  fileName  )  [read]

Definition at line 632 of file fa.c.

References FALSE, and faReadAllSeq().

Referenced by gfClientSeqList().

00634 {
00635 return faReadAllSeq(fileName, FALSE);
00636 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* faReadAllSeq ( char *  fileName,
boolean  isDna 
) [read]

Definition at line 620 of file fa.c.

References FALSE, and faReadAllSeqMixable().

Referenced by faReadAllDna(), faReadAllPep(), and gfMakeOoc().

00622 {
00623 return faReadAllSeqMixable(fileName, isDna, FALSE);
00624 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* faReadDna ( char *  fileName  )  [read]

Definition at line 284 of file fa.c.

References faReadSeq(), and TRUE.

00286 {
00287 return faReadSeq(fileName, TRUE);
00288 }

Here is the call graph for this function:

boolean faReadMixedNext ( FILE *  f,
boolean  preserveCase,
char *  defaultName,
boolean  mustStartWithComment,
char **  retCommentLine,
struct dnaSeq **  retSeq 
)

Definition at line 29 of file fa.c.

References ArraySize, chopByWhite(), cloneString(), dnaUtilOpen(), errnoAbort(), FALSE, name, needHugeMem(), newDnaSeq(), ntChars, SEEK_SET, TRUE, and warn().

Referenced by faReadNext().

00035 {
00036 char lineBuf[1024];
00037 int lineSize;
00038 char *words[1];
00039 int c;
00040 off_t offset = ftello(f);
00041 size_t dnaSize = 0;
00042 DNA *dna, *sequence;
00043 char *name = defaultName;
00044 
00045 if (name == NULL)
00046     name = "";
00047 dnaUtilOpen();
00048 if (retCommentLine != NULL)
00049     *retCommentLine = NULL;
00050 *retSeq = NULL;
00051 
00052 /* Skip first lines until it starts with '>' */
00053 for (;;)
00054     {
00055     if(fgets(lineBuf, sizeof(lineBuf), f) == NULL)
00056         {
00057         if (ferror(f))
00058             errnoAbort("read of fasta file failed");
00059         return FALSE;
00060         }
00061     lineSize = strlen(lineBuf);
00062     if (lineBuf[0] == '>')
00063         {
00064         if (retCommentLine != NULL)
00065             *retCommentLine = cloneString(lineBuf);
00066         offset = ftello(f);
00067         chopByWhite(lineBuf, words, ArraySize(words));
00068         name = words[0]+1;
00069         break;
00070         }
00071     else if (!mustStartWithComment)
00072         {
00073         if (fseeko(f, offset, SEEK_SET) < 0)
00074             errnoAbort("fseek on fasta file failed");
00075         break;
00076         }
00077     else
00078         offset += lineSize;
00079     }
00080 /* Count up DNA. */
00081 for (;;)
00082     {
00083     c = fgetc(f);
00084     if (c == EOF || c == '>')
00085         break;
00086     if (isalpha(c))
00087         {
00088         ++dnaSize;
00089         }
00090     }
00091 
00092 if (dnaSize == 0)
00093     {
00094     warn("Invalid fasta format: sequence size == 0 for element %s",name);
00095     }
00096 
00097 /* Allocate DNA and fill it up from file. */
00098 dna = sequence = needHugeMem(dnaSize+1);
00099 if (fseeko(f, offset, SEEK_SET) < 0)
00100     errnoAbort("fseek on fasta file failed");
00101 for (;;)
00102     {
00103     c = fgetc(f);
00104     if (c == EOF || c == '>')
00105         break;
00106     if (isalpha(c))
00107         {
00108         /* check for non-DNA char */
00109         if (ntChars[c] == 0)
00110             {
00111             *dna++ = preserveCase ? 'N' : 'n';
00112             }
00113         else
00114             {
00115             *dna++ = preserveCase ? c : ntChars[c];
00116             }
00117         }
00118     }
00119 if (c == '>')
00120     ungetc(c, f);
00121 *dna = 0;
00122 
00123 *retSeq = newDnaSeq(sequence, dnaSize, name);
00124 if (ferror(f))
00125     errnoAbort("read of fasta file failed");    
00126 return TRUE;
00127 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean faReadNext ( FILE *  f,
char *  defaultName,
boolean  mustStartWithComment,
char **  retCommentLine,
struct dnaSeq **  retSeq 
)

Definition at line 17 of file fa.c.

References faReadMixedNext().

Referenced by faReadOneDnaSeq(), flyCdnaSeq(), nextWormCdnaAndInfo(), and wormCdnaSeq().

00024 {
00025     return faReadMixedNext(f, 0, defaultName, mustStartWithComment,
00026                                         retCommentLine, retSeq);
00027 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* faReadOneDnaSeq ( FILE *  f,
char *  name,
boolean  mustStartWithSign 
) [read]

Definition at line 130 of file fa.c.

References faReadNext().

Referenced by nextWormCdna().

00133 {
00134 struct dnaSeq *seq;
00135 if (!faReadNext(f, defaultName, mustStartWithComment, NULL, &seq))
00136     return NULL;
00137 else
00138     return seq;
00139 }

Here is the call graph for this function:

Here is the caller graph for this function:

bioSeq* faReadSeq ( char *  fileName,
boolean  isDna 
) [read]

Definition at line 267 of file fa.c.

References errAbort(), faSeqFromMemText(), fileSize(), and needHugeMem().

Referenced by faReadAa(), faReadDna(), and queryServer().

00269 {
00270 int maxSize = fileSize(fileName);
00271 int fd;
00272 DNA *s;
00273 
00274 if (maxSize < 0)
00275     errAbort("can't open %s", fileName);
00276 s = needHugeMem(maxSize+1);
00277 fd = open(fileName, O_RDONLY);
00278 read(fd, s, maxSize);
00279 close(fd);
00280 s[maxSize] = 0;
00281 return faSeqFromMemText(s, isDna);
00282 }

Here is the call graph for this function:

Here is the caller graph for this function:

bioSeq* faSeqFromMemText ( char *  text,
boolean  isDna 
)

Definition at line 251 of file fa.c.

References faNextSeqFromMemText().

Referenced by faReadSeq().

00253 {
00254 return faNextSeqFromMemText(&text, isDna);
00255 }

Here is the call graph for this function:

Here is the caller graph for this function:

bioSeq* faSeqListFromMemText ( char *  text,
boolean  isDna 
)

Definition at line 225 of file fa.c.

References faNextSeqFromMemText(), slAddHead, and slReverse().

Referenced by doBlat().

00227 {
00228 bioSeq *seqList = NULL, *seq;
00229 while ((seq = faNextSeqFromMemText(&text, isDna)) != NULL)
00230     {
00231     slAddHead(&seqList, seq);
00232     }
00233 slReverse(&seqList);
00234 return seqList;
00235 }

Here is the call graph for this function:

Here is the caller graph for this function:

bioSeq* faSeqListFromMemTextRaw ( char *  text  ) 

Definition at line 237 of file fa.c.

References faNextSeqFromMemTextRaw(), slAddHead, and slReverse().

00240 {
00241 bioSeq *seqList = NULL, *seq;
00242 while ((seq = faNextSeqFromMemTextRaw(&text)) != NULL)
00243     {
00244     slAddHead(&seqList, seq);
00245     }
00246 slReverse(&seqList);
00247 return seqList;
00248 }

Here is the call graph for this function:

boolean faSomeSpeedReadNext ( struct lineFile lf,
DNA **  retDna,
int *  retSize,
char **  retName,
boolean  isDna 
)

Definition at line 528 of file fa.c.

References FALSE, faMixedSpeedReadNext(), faToDna(), faToProtein(), dnaSeq::size, and TRUE.

Referenced by faPepSpeedReadNext(), faReadAllMixableInLf(), faReadNamedSeq(), faSpeedReadNext(), and gfClient().

00530 {
00531 char *poly;
00532 int size;
00533 
00534 if (!faMixedSpeedReadNext(lf, retDna, retSize, retName))
00535     return FALSE;
00536 size = *retSize;
00537 poly = *retDna;
00538 if (isDna)
00539     faToDna(poly, size);
00540 else
00541     faToProtein(poly, size);
00542 return TRUE;
00543 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean faSpeedReadNext ( struct lineFile lf,
DNA **  retDna,
int *  retSize,
char **  retName 
)

Definition at line 551 of file fa.c.

References faSomeSpeedReadNext(), and TRUE.

Referenced by genoFindDirect().

00556 {
00557 return faSomeSpeedReadNext(lf, retDna, retSize, retName, TRUE);
00558 }

Here is the call graph for this function:

Here is the caller graph for this function:

void faToDna ( char *  poly,
int  size 
)

Definition at line 512 of file fa.c.

References dnaUtilOpen(), and ntChars.

Referenced by faSomeSpeedReadNext(), gfClientSeqList(), gfClientUnmask(), and maskQuerySeq().

00516 {
00517 int i;
00518 char c;
00519 dnaUtilOpen();
00520 for (i=0; i<size; ++i)
00521     {
00522     if ((c = ntChars[(int)poly[i]]) == 0)
00523         c = 'n';
00524     poly[i] = c;
00525     }
00526 }

Here is the call graph for this function:

Here is the caller graph for this function:

void faToProtein ( char *  poly,
int  size 
)

Definition at line 496 of file fa.c.

References aaChars, and dnaUtilOpen().

Referenced by faSomeSpeedReadNext(), gfClientSeqList(), and maskQuerySeq().

00500 {
00501 int i;
00502 char c;
00503 dnaUtilOpen();
00504 for (i=0; i<size; ++i)
00505     {
00506     if ((c = aaChars[(int)poly[i]]) == 0)
00507         c = 'X';
00508     poly[i] = c;
00509     }
00510 }

Here is the call graph for this function:

Here is the caller graph for this function:

void faWrite ( char *  fileName,
char *  startLine,
DNA dna,
int  dnaSize 
)

Definition at line 409 of file fa.c.

References errnoAbort(), faWriteNext(), and mustOpen().

00411 {
00412 FILE *f = mustOpen(fileName, "w");
00413 faWriteNext(f, startLine, dna, dnaSize);
00414 if (fclose(f) != 0)
00415     errnoAbort("fclose failed");
00416 }

Here is the call graph for this function:

void faWriteAll ( char *  fileName,
bioSeq seqList 
)

Definition at line 418 of file fa.c.

References dnaSeq::dna, errnoAbort(), faWriteNext(), mustOpen(), dnaSeq::name, dnaSeq::next, and dnaSeq::size.

Referenced by doBlat().

00420 {
00421 FILE *f = mustOpen(fileName, "w");
00422 bioSeq *seq;
00423 
00424 for (seq=seqList; seq != NULL; seq = seq->next)
00425     faWriteNext(f, seq->name, seq->dna, seq->size);
00426 if (fclose(f) != 0)
00427     errnoAbort("fclose failed");
00428 }

Here is the call graph for this function:

Here is the caller graph for this function:

void faWriteNext ( FILE *  f,
char *  startLine,
DNA dna,
int  dnaSize 
)

Definition at line 399 of file fa.c.

References writeSeqWithBreaks().

Referenced by bandExt(), faWrite(), faWriteAll(), outputFa(), and startServer().

00401 {
00402 if (dnaSize == 0)
00403     return;
00404 if (startLine != NULL)
00405     fprintf(f, ">%s\n", startLine);
00406 writeSeqWithBreaks(f, dna, dnaSize, 50);
00407 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Tue Dec 25 18:55:11 2007 for blat by  doxygen 1.5.2