inc/oldGff.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  gff
struct  gffSegment
struct  gffGene

Typedefs

typedef gffSegment GffIntron
typedef gffSegment GffExon

Functions

boolean gffOpen (struct gff *gff, char *fileName)
boolean gffOpenAndRead (struct gff *gff, char *fileName)
void gffClose (struct gff *gff)
boolean gffReadDna (struct gff *gff)
gffGenegffFindGene (struct gff *gff, char *geneName)
gffGenegffFindGeneIgnoreCase (struct gff *gff, char *geneName)
void gffPrintInfo (struct gff *gff, FILE *out)
boolean gffReadGenes (struct gff *gff)
gffGenegffDupeGene (struct gff *gff, struct gffGene *oldGene)
gffGenegffDupeGeneAndSurrounds (struct gff *gff, struct gffGene *oldGene, int leftExtra, int rightExtra)
gffGenegffGeneWithOwnDna (struct gff *gff, char *geneName)
void gffFreeGene (struct gffGene **pGene)
dnaSeqgffReadDnaSeq (char *fileName)


Typedef Documentation

typedef struct gffSegment GffExon

Definition at line 40 of file oldGff.h.

typedef struct gffSegment GffIntron

Definition at line 39 of file oldGff.h.


Function Documentation

void gffClose ( struct gff gff  ) 

Definition at line 74 of file oldGff.c.

References gff::dna, gff::file, freeMem(), lmCleanup(), gff::memPool, and zeroBytes().

Referenced by gffOpenAndRead(), and gffReadDnaSeq().

00076 {
00077 if (gff->file != NULL)
00078     fclose(gff->file);
00079 freeMem(gff->dna);
00080 lmCleanup(&gff->memPool);
00081 zeroBytes(gff, sizeof(*gff));
00082 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffDupeGene ( struct gff gff,
struct gffGene oldGene 
) [read]

Definition at line 589 of file oldGff.c.

References gffDupeGeneAndSurrounds().

Referenced by gffGeneWithOwnDna().

00591 {
00592 return gffDupeGeneAndSurrounds(gff, oldGene, 0, 0);
00593 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffDupeGeneAndSurrounds ( struct gff gff,
struct gffGene oldGene,
int  leftExtra,
int  rightExtra 
) [read]

Definition at line 549 of file oldGff.c.

References gffGene::dna, gffGene::dnaSize, dupeSegmentList(), gffGene::end, gffGene::exons, fixDirectionAndOffsets(), geneDna(), gffFreeGene(), gffGene::introns, gffGene::name, needMem(), gffGene::next, slCount(), gffGene::start, and gffGene::strand.

Referenced by gffDupeGene().

00556 {
00557 struct gffGene *g;
00558 int intronCount = slCount(oldGene->introns);
00559 int exonCount = slCount(oldGene->exons);
00560 int memSize = sizeof(*g) + (intronCount + exonCount) * sizeof(struct gffSegment);
00561 char *memPt;
00562 int firstExonOffset;
00563 
00564 
00565 memPt = needMem(memSize);
00566 g = (struct gffGene *)memPt;
00567 memPt += sizeof(*g);
00568 g->exons = (struct gffSegment *)memPt;
00569 memPt += exonCount*sizeof(struct gffSegment);
00570 g->introns = (struct gffSegment *)memPt;
00571 
00572 g->next = NULL;
00573 g->start = oldGene->start;
00574 g->end = oldGene->end;
00575 g->strand = oldGene->strand;
00576 memcpy(g->name, oldGene->name, sizeof(g->name));
00577 g->exons = dupeSegmentList(oldGene->exons, g->exons);
00578 g->introns = dupeSegmentList(oldGene->introns, g->introns);
00579 if (!geneDna(gff, oldGene, leftExtra, rightExtra, 
00580     &g->dna, &g->dnaSize, &firstExonOffset))
00581     {
00582     gffFreeGene(&g);
00583     return NULL;
00584     }
00585 fixDirectionAndOffsets(g, g->dna, g->dnaSize, firstExonOffset);
00586 return g;
00587 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffFindGene ( struct gff gff,
char *  geneName 
) [read]

Definition at line 206 of file oldGff.c.

References gff::genes, gffGene::name, and gffGene::next.

Referenced by gffReadGenes().

00208 {
00209 struct gffGene *g;
00210 
00211 for (g=gff->genes; g!=NULL; g=g->next)
00212     {
00213     if (strcmp(geneName, g->name) == 0)
00214         return g;
00215     }
00216 return NULL;
00217 }

Here is the caller graph for this function:

struct gffGene* gffFindGeneIgnoreCase ( struct gff gff,
char *  geneName 
) [read]

Definition at line 219 of file oldGff.c.

References differentWord(), gff::genes, gffGene::name, and gffGene::next.

Referenced by gffGeneWithOwnDna().

00221 {
00222 struct gffGene *g;
00223 
00224 for (g=gff->genes; g!=NULL; g=g->next)
00225     {
00226     if (differentWord(geneName, g->name) == 0)
00227         return g;
00228     }
00229 return NULL;
00230 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gffFreeGene ( struct gffGene **  pGene  ) 

Definition at line 606 of file oldGff.c.

References gffGene::dna, and freeMem().

Referenced by gffDupeGeneAndSurrounds().

00611 {
00612 struct gffGene *g = *pGene;
00613 if (g == NULL)
00614     return;
00615 freeMem(g->dna);
00616 freeMem(g);
00617 *pGene = NULL;
00618 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffGeneWithOwnDna ( struct gff gff,
char *  geneName 
) [read]

Definition at line 595 of file oldGff.c.

References gffDupeGene(), and gffFindGeneIgnoreCase().

00597 {
00598 struct gffGene *oldGene;
00599 
00600 oldGene = gffFindGeneIgnoreCase(gff, geneName);
00601 if (oldGene == NULL)
00602     return NULL;
00603 return gffDupeGene(gff, oldGene);
00604 }

Here is the call graph for this function:

boolean gffOpen ( struct gff gff,
char *  fileName 
)

Definition at line 45 of file oldGff.c.

References _gffIdent, _gffSeekDoubleSharpLine(), ArraySize, dnaUtilOpen(), FALSE, fileSize(), lmInit(), TRUE, warn(), and zeroBytes().

Referenced by gffOpenAndRead(), and gffReadDnaSeq().

00047 {
00048     dnaUtilOpen();
00049 
00050     /* Initialize structure and open file. */
00051     zeroBytes(gff, sizeof(*gff));
00052     gff->memPool = lmInit(16*1024);
00053     gff->fileSize = fileSize(fileName);
00054     if (gff->fileSize < 0 ||
00055        (gff->file = fopen(fileName, "rb")) == NULL)
00056             {
00057             warn("Couldn't find the file named %s\n", fileName);
00058             return FALSE;
00059             }
00060     strcpy(gff->fileName, fileName);
00061     gff->bufSize = ArraySize(gff->buf);
00062 
00063     /* Make sure it's a gff file. */
00064     _gffSeekDoubleSharpLine(gff);
00065     if (strncmp(gff->buf, _gffIdent, strlen(_gffIdent)) != 0)
00066         {
00067         warn("%s doesn't appear to be a .gff file\n", fileName);
00068         return FALSE;
00069         }
00070 
00071     return TRUE;
00072 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean gffOpenAndRead ( struct gff gff,
char *  fileName 
)

Definition at line 637 of file oldGff.c.

References FALSE, gffClose(), gffOpen(), gffReadDna(), gffReadGenes(), and TRUE.

00639 {
00640 if (gffOpen(gff, fileName))
00641     if (gffReadDna(gff))
00642         if (gffReadGenes(gff))
00643             return TRUE;
00644 gffClose(gff);
00645 return FALSE;
00646 }

Here is the call graph for this function:

void gffPrintInfo ( struct gff gff,
FILE *  out 
)

Definition at line 280 of file oldGff.c.

References gff::dnaName, gff::dnaSize, gffGene::end, gffGene::exons, gff::fileName, gff::genes, gffGene::introns, gffGene::name, gffGene::next, slCount(), and gffGene::start.

00282 {
00283 struct gffGene *gene;
00284 
00285 fprintf(out, "\n%s\n", gff->fileName);
00286 fprintf(out, "DNA %s (%ld bases)\n", 
00287         gff->dnaName, gff->dnaSize);
00288 fprintf(out, "%d genes\n", slCount(gff->genes));
00289 for (gene = gff->genes; gene != NULL; gene = gene->next)
00290     {
00291     fprintf(out, "gene %s has %ld bases, %d exons, %d introns\n",
00292         gene->name, gene->end - gene->start + 1,
00293         slCount(gene->exons), slCount(gene->introns));
00294     }
00295 }

Here is the call graph for this function:

boolean gffReadDna ( struct gff gff  ) 

Definition at line 167 of file oldGff.c.

References _gffSeekDna(), gff::buf, gff::bytesInBuf, gff::dna, gff::dnaSize, FALSE, gff::fileSize, gffNextDnaLine(), ntChars, gff::readIx, TRUE, wantMem(), and warn().

Referenced by gffOpenAndRead(), and gffReadDnaSeq().

00169 {
00170 long dnaSize = 0;
00171 DNA *dna;
00172 DNA *line;
00173 int lineCount;
00174 DNA b;
00175 if (gff->dna != NULL)
00176         return TRUE; /* We already read it. */
00177 if (!_gffSeekDna(gff))
00178         return FALSE;
00179 if ((gff->dna = wantMem(gff->fileSize)) == NULL)
00180     {
00181     warn("Couldn't allocate %ld bytes for DNA\n",
00182         gff->fileSize);
00183     return FALSE;
00184     }
00185 dna = gff->dna;
00186 for (;;)
00187     {
00188     if (!gffNextDnaLine(gff))
00189         break;
00190     line = gff->buf + gff->readIx;
00191     lineCount = gff->bytesInBuf-gff->readIx;
00192     while (--lineCount >= 0)
00193         {
00194         b = *line++;
00195         if ((b = ntChars[(int)b]) != 0)
00196             {
00197             *dna++ = b;
00198             dnaSize += 1;
00199             }
00200         }
00201     }
00202 gff->dnaSize = dnaSize;
00203 return TRUE;
00204 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* gffReadDnaSeq ( char *  fileName  )  [read]

Definition at line 620 of file oldGff.c.

References gff::dna, gff::dnaName, gff::dnaSize, gffClose(), gffOpen(), gffReadDna(), and newDnaSeq().

00622 {
00623 struct gff gff;
00624 struct dnaSeq *seq = NULL;
00625 
00626 if (!gffOpen(&gff, fileName))
00627     return NULL;
00628 if (gffReadDna(&gff))
00629     {
00630     seq = newDnaSeq(gff.dna, gff.dnaSize, gff.dnaName);
00631     gff.dna = NULL;
00632     }
00633 gffClose(&gff);
00634 return seq;
00635 }

Here is the call graph for this function:

boolean gffReadGenes ( struct gff gff  ) 

Definition at line 310 of file oldGff.c.

References _gffGetLine(), gff::buf, checkWordCount(), differentWord(), gffSegLine::end, FALSE, gffSegLine::feature, gff::fileName, gffSegLine::frame, gff::genes, gffFindGene(), gffNeedMem(), gffSegLineScan(), gffSegLine::group, gff::lineNumber, slAddTail(), gffSegLine::start, gffSegLine::strand, TRUE, and warn().

Referenced by gffOpenAndRead().

00312 {
00313 int wordCount;
00314 struct gffSegLine seg;
00315 char curGroup[128];
00316 struct gffGene *gene = NULL;
00317 GffIntron *intron = NULL;
00318 GffExon *exon = NULL;
00319 boolean warnedUnknown = FALSE;
00320 boolean isNewGene;
00321 
00322 curGroup[0] = 0; /* Start off with no group */
00323 
00324 /* Line scanning loop. */
00325 for (;;)
00326     {
00327     /* Get next line and parse it into segLine data structure. */
00328     if (!_gffGetLine(gff)) 
00329         break;   /* End of file. */
00330     if (gff->buf[0] == '#')
00331         continue; /* Ignore sharp containing lines. */
00332     wordCount = gffSegLineScan(gff, &seg);
00333     if (wordCount < 9)
00334         continue; /* Ignore blank lines and short ones. */
00335 
00336     /* Make sure that start is less than or equal end. */
00337     if (seg.start > seg.end)
00338         {
00339         warn("start greater than end line %d of %s.\n",
00340                 gff->lineNumber, gff->fileName);
00341         return FALSE;
00342         }
00343 
00344     /* Get the gene we're working on.  First see if
00345      * it's the same as last time around. */
00346     isNewGene = FALSE;
00347     if (strcmp(seg.group, curGroup) != 0)
00348         {
00349         strcpy(curGroup, seg.group);
00350         if ((gene = gffFindGene(gff, seg.group)) == NULL)
00351             {
00352             /* It's a new gene! */
00353             if (!checkWordCount(gff, wordCount)) return FALSE;
00354             isNewGene = TRUE;
00355             gene = gffNeedMem(gff, sizeof(*gene));
00356             strcpy(gene->name, seg.group);
00357             slAddTail(&gff->genes, gene); 
00358             gene->strand = seg.strand[0];
00359             gene->frame = atoi(seg.frame);
00360             if (differentWord(seg.feature, "CDS") == 0)
00361                 {
00362                 gene->start = seg.start-1;
00363                 gene->end = seg.end-1;
00364                 }
00365             }
00366         }
00367 
00368     /* Look at what sort of feature it is, and decide what to do. */
00369 
00370     if (differentWord(seg.feature, "CDS")==0)
00371         {
00372         /* CDS (coding segments) have been processed already
00373          * for the most part. Here just make sure they aren't
00374          * duplicated. */
00375         if (!checkWordCount(gff, wordCount)) return FALSE;
00376         if (!isNewGene)
00377             {
00378             if (gene->start != 0 || gene->end != 0)
00379                 {
00380                 warn("Warning duplicate CDS for %s\n",
00381                         seg.group);
00382                 warn("Line %d of %s\n", 
00383                         gff->lineNumber, gff->fileName);
00384                 }
00385             }
00386         }
00387     else if (differentWord(seg.feature, "SE") == 0 
00388         ||   differentWord(seg.feature, "IE") == 0
00389         ||   differentWord(seg.feature, "FE") == 0
00390         ||   differentWord(seg.feature, "E") == 0
00391         ||   differentWord(seg.feature, "exon") == 0)
00392         {
00393         /* It's some sort of exon.  We'll deal with the complications
00394          * of it being possibly on the minus strand later, so can
00395          * tread initial, final, single, and regular exons the same
00396          * here. */
00397         if (!checkWordCount(gff, wordCount)) return FALSE;
00398         exon = gffNeedMem(gff, sizeof(*exon));
00399         exon->start = seg.start-1;
00400         exon->end = seg.end-1;
00401         exon->frame = atoi(seg.frame);
00402         gffSegmentInsertSort(&gene->exons, exon);
00403         }
00404     else if (differentWord(seg.feature, "I") == 0 
00405         ||   differentWord(seg.feature, "intron") == 0)
00406         {
00407         /* It's an intron. */
00408         if (!checkWordCount(gff, wordCount)) return FALSE;
00409         intron = gffNeedMem(gff, sizeof(*intron));
00410         intron->start = seg.start-1;
00411         intron->end = seg.end-1;
00412         intron->frame = atoi(seg.frame);
00413         gffSegmentInsertSort(&gene->introns, intron);
00414         }
00415     else if (strcmp(seg.feature, "IG")  == 0)
00416         {
00417         /* I don't know what it is, but we can ignore it. */
00418         }
00419     else
00420         {
00421         if (!warnedUnknown)
00422             {
00423             warn("Unknown feature %s line %d of %s, ignoring\n",
00424                     seg.feature,  gff->lineNumber, gff->fileName);
00425             warnedUnknown = TRUE;
00426             }
00427         }
00428     }
00429 
00430 /* Fix up gene length from exons if needed. */
00431 for (gene = gff->genes; gene != NULL; gene = gene->next)
00432     {
00433     if (gene->start >= gene->end)
00434         {
00435         offsetsFromExons(gene);
00436         }
00437     }
00438 return TRUE;
00439 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Tue Dec 25 19:09:07 2007 for blat by  doxygen 1.5.2