This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Data Structures | |
| struct | gff |
| struct | gffSegment |
| struct | gffGene |
Typedefs | |
| typedef gffSegment | GffIntron |
| typedef gffSegment | GffExon |
Functions | |
| boolean | gffOpen (struct gff *gff, char *fileName) |
| boolean | gffOpenAndRead (struct gff *gff, char *fileName) |
| void | gffClose (struct gff *gff) |
| boolean | gffReadDna (struct gff *gff) |
| gffGene * | gffFindGene (struct gff *gff, char *geneName) |
| gffGene * | gffFindGeneIgnoreCase (struct gff *gff, char *geneName) |
| void | gffPrintInfo (struct gff *gff, FILE *out) |
| boolean | gffReadGenes (struct gff *gff) |
| gffGene * | gffDupeGene (struct gff *gff, struct gffGene *oldGene) |
| gffGene * | gffDupeGeneAndSurrounds (struct gff *gff, struct gffGene *oldGene, int leftExtra, int rightExtra) |
| gffGene * | gffGeneWithOwnDna (struct gff *gff, char *geneName) |
| void | gffFreeGene (struct gffGene **pGene) |
| dnaSeq * | gffReadDnaSeq (char *fileName) |
| typedef struct gffSegment GffExon |
| typedef struct gffSegment GffIntron |
| void gffClose | ( | struct gff * | gff | ) |
Definition at line 74 of file oldGff.c.
References gff::dna, gff::file, freeMem(), lmCleanup(), gff::memPool, and zeroBytes().
Referenced by gffOpenAndRead(), and gffReadDnaSeq().
00076 { 00077 if (gff->file != NULL) 00078 fclose(gff->file); 00079 freeMem(gff->dna); 00080 lmCleanup(&gff->memPool); 00081 zeroBytes(gff, sizeof(*gff)); 00082 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 589 of file oldGff.c.
References gffDupeGeneAndSurrounds().
Referenced by gffGeneWithOwnDna().
00591 { 00592 return gffDupeGeneAndSurrounds(gff, oldGene, 0, 0); 00593 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct gffGene* gffDupeGeneAndSurrounds | ( | struct gff * | gff, | |
| struct gffGene * | oldGene, | |||
| int | leftExtra, | |||
| int | rightExtra | |||
| ) | [read] |
Definition at line 549 of file oldGff.c.
References gffGene::dna, gffGene::dnaSize, dupeSegmentList(), gffGene::end, gffGene::exons, fixDirectionAndOffsets(), geneDna(), gffFreeGene(), gffGene::introns, gffGene::name, needMem(), gffGene::next, slCount(), gffGene::start, and gffGene::strand.
Referenced by gffDupeGene().
00556 { 00557 struct gffGene *g; 00558 int intronCount = slCount(oldGene->introns); 00559 int exonCount = slCount(oldGene->exons); 00560 int memSize = sizeof(*g) + (intronCount + exonCount) * sizeof(struct gffSegment); 00561 char *memPt; 00562 int firstExonOffset; 00563 00564 00565 memPt = needMem(memSize); 00566 g = (struct gffGene *)memPt; 00567 memPt += sizeof(*g); 00568 g->exons = (struct gffSegment *)memPt; 00569 memPt += exonCount*sizeof(struct gffSegment); 00570 g->introns = (struct gffSegment *)memPt; 00571 00572 g->next = NULL; 00573 g->start = oldGene->start; 00574 g->end = oldGene->end; 00575 g->strand = oldGene->strand; 00576 memcpy(g->name, oldGene->name, sizeof(g->name)); 00577 g->exons = dupeSegmentList(oldGene->exons, g->exons); 00578 g->introns = dupeSegmentList(oldGene->introns, g->introns); 00579 if (!geneDna(gff, oldGene, leftExtra, rightExtra, 00580 &g->dna, &g->dnaSize, &firstExonOffset)) 00581 { 00582 gffFreeGene(&g); 00583 return NULL; 00584 } 00585 fixDirectionAndOffsets(g, g->dna, g->dnaSize, firstExonOffset); 00586 return g; 00587 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 206 of file oldGff.c.
References gff::genes, gffGene::name, and gffGene::next.
Referenced by gffReadGenes().
00208 { 00209 struct gffGene *g; 00210 00211 for (g=gff->genes; g!=NULL; g=g->next) 00212 { 00213 if (strcmp(geneName, g->name) == 0) 00214 return g; 00215 } 00216 return NULL; 00217 }
Here is the caller graph for this function:

Definition at line 219 of file oldGff.c.
References differentWord(), gff::genes, gffGene::name, and gffGene::next.
Referenced by gffGeneWithOwnDna().
00221 { 00222 struct gffGene *g; 00223 00224 for (g=gff->genes; g!=NULL; g=g->next) 00225 { 00226 if (differentWord(geneName, g->name) == 0) 00227 return g; 00228 } 00229 return NULL; 00230 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gffFreeGene | ( | struct gffGene ** | pGene | ) |
Definition at line 606 of file oldGff.c.
References gffGene::dna, and freeMem().
Referenced by gffDupeGeneAndSurrounds().
00611 { 00612 struct gffGene *g = *pGene; 00613 if (g == NULL) 00614 return; 00615 freeMem(g->dna); 00616 freeMem(g); 00617 *pGene = NULL; 00618 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 595 of file oldGff.c.
References gffDupeGene(), and gffFindGeneIgnoreCase().
00597 { 00598 struct gffGene *oldGene; 00599 00600 oldGene = gffFindGeneIgnoreCase(gff, geneName); 00601 if (oldGene == NULL) 00602 return NULL; 00603 return gffDupeGene(gff, oldGene); 00604 }
Here is the call graph for this function:

| boolean gffOpen | ( | struct gff * | gff, | |
| char * | fileName | |||
| ) |
Definition at line 45 of file oldGff.c.
References _gffIdent, _gffSeekDoubleSharpLine(), ArraySize, dnaUtilOpen(), FALSE, fileSize(), lmInit(), TRUE, warn(), and zeroBytes().
Referenced by gffOpenAndRead(), and gffReadDnaSeq().
00047 { 00048 dnaUtilOpen(); 00049 00050 /* Initialize structure and open file. */ 00051 zeroBytes(gff, sizeof(*gff)); 00052 gff->memPool = lmInit(16*1024); 00053 gff->fileSize = fileSize(fileName); 00054 if (gff->fileSize < 0 || 00055 (gff->file = fopen(fileName, "rb")) == NULL) 00056 { 00057 warn("Couldn't find the file named %s\n", fileName); 00058 return FALSE; 00059 } 00060 strcpy(gff->fileName, fileName); 00061 gff->bufSize = ArraySize(gff->buf); 00062 00063 /* Make sure it's a gff file. */ 00064 _gffSeekDoubleSharpLine(gff); 00065 if (strncmp(gff->buf, _gffIdent, strlen(_gffIdent)) != 0) 00066 { 00067 warn("%s doesn't appear to be a .gff file\n", fileName); 00068 return FALSE; 00069 } 00070 00071 return TRUE; 00072 }
Here is the call graph for this function:

Here is the caller graph for this function:

| boolean gffOpenAndRead | ( | struct gff * | gff, | |
| char * | fileName | |||
| ) |
Definition at line 637 of file oldGff.c.
References FALSE, gffClose(), gffOpen(), gffReadDna(), gffReadGenes(), and TRUE.
00639 { 00640 if (gffOpen(gff, fileName)) 00641 if (gffReadDna(gff)) 00642 if (gffReadGenes(gff)) 00643 return TRUE; 00644 gffClose(gff); 00645 return FALSE; 00646 }
Here is the call graph for this function:

| void gffPrintInfo | ( | struct gff * | gff, | |
| FILE * | out | |||
| ) |
Definition at line 280 of file oldGff.c.
References gff::dnaName, gff::dnaSize, gffGene::end, gffGene::exons, gff::fileName, gff::genes, gffGene::introns, gffGene::name, gffGene::next, slCount(), and gffGene::start.
00282 { 00283 struct gffGene *gene; 00284 00285 fprintf(out, "\n%s\n", gff->fileName); 00286 fprintf(out, "DNA %s (%ld bases)\n", 00287 gff->dnaName, gff->dnaSize); 00288 fprintf(out, "%d genes\n", slCount(gff->genes)); 00289 for (gene = gff->genes; gene != NULL; gene = gene->next) 00290 { 00291 fprintf(out, "gene %s has %ld bases, %d exons, %d introns\n", 00292 gene->name, gene->end - gene->start + 1, 00293 slCount(gene->exons), slCount(gene->introns)); 00294 } 00295 }
Here is the call graph for this function:

| boolean gffReadDna | ( | struct gff * | gff | ) |
Definition at line 167 of file oldGff.c.
References _gffSeekDna(), gff::buf, gff::bytesInBuf, gff::dna, gff::dnaSize, FALSE, gff::fileSize, gffNextDnaLine(), ntChars, gff::readIx, TRUE, wantMem(), and warn().
Referenced by gffOpenAndRead(), and gffReadDnaSeq().
00169 { 00170 long dnaSize = 0; 00171 DNA *dna; 00172 DNA *line; 00173 int lineCount; 00174 DNA b; 00175 if (gff->dna != NULL) 00176 return TRUE; /* We already read it. */ 00177 if (!_gffSeekDna(gff)) 00178 return FALSE; 00179 if ((gff->dna = wantMem(gff->fileSize)) == NULL) 00180 { 00181 warn("Couldn't allocate %ld bytes for DNA\n", 00182 gff->fileSize); 00183 return FALSE; 00184 } 00185 dna = gff->dna; 00186 for (;;) 00187 { 00188 if (!gffNextDnaLine(gff)) 00189 break; 00190 line = gff->buf + gff->readIx; 00191 lineCount = gff->bytesInBuf-gff->readIx; 00192 while (--lineCount >= 0) 00193 { 00194 b = *line++; 00195 if ((b = ntChars[(int)b]) != 0) 00196 { 00197 *dna++ = b; 00198 dnaSize += 1; 00199 } 00200 } 00201 } 00202 gff->dnaSize = dnaSize; 00203 return TRUE; 00204 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct dnaSeq* gffReadDnaSeq | ( | char * | fileName | ) | [read] |
Definition at line 620 of file oldGff.c.
References gff::dna, gff::dnaName, gff::dnaSize, gffClose(), gffOpen(), gffReadDna(), and newDnaSeq().
00622 { 00623 struct gff gff; 00624 struct dnaSeq *seq = NULL; 00625 00626 if (!gffOpen(&gff, fileName)) 00627 return NULL; 00628 if (gffReadDna(&gff)) 00629 { 00630 seq = newDnaSeq(gff.dna, gff.dnaSize, gff.dnaName); 00631 gff.dna = NULL; 00632 } 00633 gffClose(&gff); 00634 return seq; 00635 }
Here is the call graph for this function:

| boolean gffReadGenes | ( | struct gff * | gff | ) |
Definition at line 310 of file oldGff.c.
References _gffGetLine(), gff::buf, checkWordCount(), differentWord(), gffSegLine::end, FALSE, gffSegLine::feature, gff::fileName, gffSegLine::frame, gff::genes, gffFindGene(), gffNeedMem(), gffSegLineScan(), gffSegLine::group, gff::lineNumber, slAddTail(), gffSegLine::start, gffSegLine::strand, TRUE, and warn().
Referenced by gffOpenAndRead().
00312 { 00313 int wordCount; 00314 struct gffSegLine seg; 00315 char curGroup[128]; 00316 struct gffGene *gene = NULL; 00317 GffIntron *intron = NULL; 00318 GffExon *exon = NULL; 00319 boolean warnedUnknown = FALSE; 00320 boolean isNewGene; 00321 00322 curGroup[0] = 0; /* Start off with no group */ 00323 00324 /* Line scanning loop. */ 00325 for (;;) 00326 { 00327 /* Get next line and parse it into segLine data structure. */ 00328 if (!_gffGetLine(gff)) 00329 break; /* End of file. */ 00330 if (gff->buf[0] == '#') 00331 continue; /* Ignore sharp containing lines. */ 00332 wordCount = gffSegLineScan(gff, &seg); 00333 if (wordCount < 9) 00334 continue; /* Ignore blank lines and short ones. */ 00335 00336 /* Make sure that start is less than or equal end. */ 00337 if (seg.start > seg.end) 00338 { 00339 warn("start greater than end line %d of %s.\n", 00340 gff->lineNumber, gff->fileName); 00341 return FALSE; 00342 } 00343 00344 /* Get the gene we're working on. First see if 00345 * it's the same as last time around. */ 00346 isNewGene = FALSE; 00347 if (strcmp(seg.group, curGroup) != 0) 00348 { 00349 strcpy(curGroup, seg.group); 00350 if ((gene = gffFindGene(gff, seg.group)) == NULL) 00351 { 00352 /* It's a new gene! */ 00353 if (!checkWordCount(gff, wordCount)) return FALSE; 00354 isNewGene = TRUE; 00355 gene = gffNeedMem(gff, sizeof(*gene)); 00356 strcpy(gene->name, seg.group); 00357 slAddTail(&gff->genes, gene); 00358 gene->strand = seg.strand[0]; 00359 gene->frame = atoi(seg.frame); 00360 if (differentWord(seg.feature, "CDS") == 0) 00361 { 00362 gene->start = seg.start-1; 00363 gene->end = seg.end-1; 00364 } 00365 } 00366 } 00367 00368 /* Look at what sort of feature it is, and decide what to do. */ 00369 00370 if (differentWord(seg.feature, "CDS")==0) 00371 { 00372 /* CDS (coding segments) have been processed already 00373 * for the most part. Here just make sure they aren't 00374 * duplicated. */ 00375 if (!checkWordCount(gff, wordCount)) return FALSE; 00376 if (!isNewGene) 00377 { 00378 if (gene->start != 0 || gene->end != 0) 00379 { 00380 warn("Warning duplicate CDS for %s\n", 00381 seg.group); 00382 warn("Line %d of %s\n", 00383 gff->lineNumber, gff->fileName); 00384 } 00385 } 00386 } 00387 else if (differentWord(seg.feature, "SE") == 0 00388 || differentWord(seg.feature, "IE") == 0 00389 || differentWord(seg.feature, "FE") == 0 00390 || differentWord(seg.feature, "E") == 0 00391 || differentWord(seg.feature, "exon") == 0) 00392 { 00393 /* It's some sort of exon. We'll deal with the complications 00394 * of it being possibly on the minus strand later, so can 00395 * tread initial, final, single, and regular exons the same 00396 * here. */ 00397 if (!checkWordCount(gff, wordCount)) return FALSE; 00398 exon = gffNeedMem(gff, sizeof(*exon)); 00399 exon->start = seg.start-1; 00400 exon->end = seg.end-1; 00401 exon->frame = atoi(seg.frame); 00402 gffSegmentInsertSort(&gene->exons, exon); 00403 } 00404 else if (differentWord(seg.feature, "I") == 0 00405 || differentWord(seg.feature, "intron") == 0) 00406 { 00407 /* It's an intron. */ 00408 if (!checkWordCount(gff, wordCount)) return FALSE; 00409 intron = gffNeedMem(gff, sizeof(*intron)); 00410 intron->start = seg.start-1; 00411 intron->end = seg.end-1; 00412 intron->frame = atoi(seg.frame); 00413 gffSegmentInsertSort(&gene->introns, intron); 00414 } 00415 else if (strcmp(seg.feature, "IG") == 0) 00416 { 00417 /* I don't know what it is, but we can ignore it. */ 00418 } 00419 else 00420 { 00421 if (!warnedUnknown) 00422 { 00423 warn("Unknown feature %s line %d of %s, ignoring\n", 00424 seg.feature, gff->lineNumber, gff->fileName); 00425 warnedUnknown = TRUE; 00426 } 00427 } 00428 } 00429 00430 /* Fix up gene length from exons if needed. */ 00431 for (gene = gff->genes; gene != NULL; gene = gene->next) 00432 { 00433 if (gene->start >= gene->end) 00434 { 00435 offsetsFromExons(gene); 00436 } 00437 } 00438 return TRUE; 00439 }
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2