lib/oldGff.c File Reference

#include "common.h"
#include "dnautil.h"
#include "oldGff.h"
#include "dnaseq.h"
#include "htmshell.h"
#include "portable.h"
#include "localmem.h"

Include dependency graph for oldGff.c:

Go to the source code of this file.

Data Structures

struct  gffSegLine

Defines

#define errfile   stdout

Functions

static int gffSegLineScan (struct gff *gff, struct gffSegLine *seg)
static boolean _gffSeekDoubleSharpLine ()
boolean gffOpen (struct gff *gff, char *fileName)
void gffClose (struct gff *gff)
static boolean _gffGetLine (struct gff *gff)
static boolean _gffSeekDoubleSharpLine (struct gff *gff)
static boolean _gffSeekDna (struct gff *gff)
static boolean gffNextDnaLine (struct gff *gff)
boolean gffReadDna (struct gff *gff)
gffGenegffFindGene (struct gff *gff, char *geneName)
gffGenegffFindGeneIgnoreCase (struct gff *gff, char *geneName)
static void * gffNeedMem (struct gff *gff, int size)
static void gffSegmentInsertSort (struct gffSegment **plist, struct gffSegment *seg)
static void offsetsFromExons (struct gffGene *gene)
void gffPrintInfo (struct gff *gff, FILE *out)
static boolean checkWordCount (struct gff *gff, int wordCount)
boolean gffReadGenes (struct gff *gff)
static boolean geneDna (struct gff *gff, struct gffGene *gene, int leftExtra, int rightExtra, char **retDna, long *retDnaSize, int *retStartOffset)
static void fixDirectionAndOffsets (struct gffGene *gene, char *dna, long dnaSize, int newStart)
static struct gffSegmentdupeSegmentList (struct gffSegment *oldList, struct gffSegment *newList)
gffGenegffDupeGeneAndSurrounds (struct gff *gff, struct gffGene *oldGene, int leftExtra, int rightExtra)
gffGenegffDupeGene (struct gff *gff, struct gffGene *oldGene)
gffGenegffGeneWithOwnDna (struct gff *gff, char *geneName)
void gffFreeGene (struct gffGene **pGene)
dnaSeqgffReadDnaSeq (char *fileName)
boolean gffOpenAndRead (struct gff *gff, char *fileName)

Variables

static char const rcsid [] = "$Id: oldGff.c,v 1.6 2005/04/10 14:41:24 markd Exp $"
static char _gffIdent [] = "##gff-version"


Define Documentation

#define errfile   stdout

Definition at line 18 of file oldGff.c.


Function Documentation

static boolean _gffGetLine ( struct gff gff  )  [static]

Definition at line 84 of file oldGff.c.

References gff::buf, gff::bufSize, gff::bytesInBuf, FALSE, gff::file, gff::lineNumber, gff::readIx, and TRUE.

Referenced by _gffSeekDna(), _gffSeekDoubleSharpLine(), and gffReadGenes().

00087 {
00088 char *s;
00089 s = fgets(gff->buf, gff->bufSize, gff->file);
00090 if (s == NULL)
00091     {
00092     return FALSE;
00093     }
00094 gff->bytesInBuf = strlen(gff->buf);
00095 gff->readIx = 0;
00096 gff->lineNumber += 1;
00097 return TRUE;
00098 }

Here is the caller graph for this function:

static boolean _gffSeekDna ( struct gff gff  )  [static]

Definition at line 133 of file oldGff.c.

References _gffGetLine(), gff::buf, gff::bytesInBuf, gff::dnaName, FALSE, gff::file, and TRUE.

Referenced by gffReadDna().

00135 {
00136 static char dnaIdent[] = "##DNA";
00137 
00138 rewind(gff->file);
00139 for (;;)
00140     {
00141     if (!_gffGetLine(gff)) return FALSE;
00142     if (strncmp(gff->buf, dnaIdent, strlen(dnaIdent)) == 0)
00143         {
00144         sscanf(gff->buf, "##DNA %s", gff->dnaName);
00145         gff->bytesInBuf = 0; /* We're done with gff line. */
00146         return TRUE;
00147         }
00148     }
00149 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean _gffSeekDoubleSharpLine ( struct gff gff  )  [static]

Definition at line 121 of file oldGff.c.

References _gffGetLine(), gff::buf, gff::bytesInBuf, FALSE, and TRUE.

00123 {
00124 for (;;)
00125     {
00126     if (!_gffGetLine(gff)) return FALSE;
00127     if (gff->bytesInBuf >= 2)
00128         if (gff->buf[0] == '#' && gff->buf[1] == '#') 
00129                 return TRUE;
00130     }
00131 }

Here is the call graph for this function:

static boolean _gffSeekDoubleSharpLine (  )  [static]

Referenced by gffNextDnaLine(), and gffOpen().

Here is the caller graph for this function:

static boolean checkWordCount ( struct gff gff,
int  wordCount 
) [static]

Definition at line 297 of file oldGff.c.

References gff::buf, FALSE, gff::fileName, gff::lineNumber, TRUE, and warn().

Referenced by gffReadGenes().

00298 {
00299 if (wordCount >= 9)
00300     return TRUE;
00301 else
00302     {
00303     warn("???%s???\n", gff->buf);
00304     warn("Can't handle line %d of %s.\n", 
00305             gff->lineNumber, gff->fileName);
00306     return FALSE;
00307     }
00308 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct gffSegment* dupeSegmentList ( struct gffSegment oldList,
struct gffSegment newList 
) [static, read]

Definition at line 533 of file oldGff.c.

References gffSegment::next.

Referenced by gffDupeGeneAndSurrounds().

00536 {
00537 struct gffSegment *oldEl, *newEl;
00538 
00539 if (oldList == NULL)
00540     return NULL;
00541 for (oldEl = oldList, newEl = newList; oldEl != NULL; oldEl=oldEl->next, newEl += 1)
00542     {
00543     memcpy(newEl, oldEl, sizeof(*newEl));
00544     newEl->next = ((oldEl->next == NULL) ? NULL : newEl+1);
00545     }
00546 return newList;
00547 }

Here is the caller graph for this function:

static void fixDirectionAndOffsets ( struct gffGene gene,
char *  dna,
long  dnaSize,
int  newStart 
) [static]

Definition at line 483 of file oldGff.c.

References gffSegment::end, gffGene::end, gffGene::exons, gffGene::introns, gffSegment::next, reverseComplement(), reverseOffset(), slReverse(), gffSegment::start, gffGene::start, and gffGene::strand.

Referenced by gffDupeGeneAndSurrounds().

00488 {
00489 long oldStart;
00490 long offset;
00491 GffIntron *intron;
00492 GffExon *exon;
00493 long temp;
00494 
00495 oldStart = gene->start;
00496 offset = oldStart - newStart;
00497 gene->start -= offset;
00498 gene->end -= offset;
00499 for (intron = gene->introns; intron != NULL; intron = intron->next)
00500     {
00501     intron->start -= offset;
00502     intron->end -= offset;
00503     }
00504 for (exon = gene->exons; exon != NULL; exon = exon->next)
00505     {
00506     exon->start -= offset;
00507     exon->end -= offset;
00508     }
00509 if (gene->strand == '-')
00510     {
00511     reverseComplement(dna, dnaSize);
00512     temp = reverseOffset(gene->start, dnaSize);
00513     gene->start = reverseOffset(gene->end, dnaSize);
00514     gene->end = temp;
00515     for (intron = gene->introns; intron != NULL; intron = intron->next)
00516         {
00517         temp = reverseOffset(intron->start, dnaSize);
00518         intron->start = reverseOffset(intron->end, dnaSize);
00519         intron->end = temp;
00520         }
00521     for (exon = gene->exons; exon != NULL; exon = exon->next)
00522         {
00523         temp = reverseOffset(exon->start, dnaSize);
00524         exon->start = reverseOffset(exon->end, dnaSize);
00525         exon->end = temp;
00526         }
00527     slReverse(&gene->introns);
00528     slReverse(&gene->exons);
00529     gene->strand = '+';
00530     }
00531 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean geneDna ( struct gff gff,
struct gffGene gene,
int  leftExtra,
int  rightExtra,
char **  retDna,
long *  retDnaSize,
int *  retStartOffset 
) [static]

Definition at line 441 of file oldGff.c.

References gff::dna, gffGene::dna, gff::dnaSize, gffGene::end, FALSE, needMem(), gffGene::start, and TRUE.

Referenced by gffDupeGeneAndSurrounds().

00445 {
00446 char *dna;
00447 char *pt;
00448 long geneSize;
00449 long i;
00450 long seqStart, seqEnd, seqSize;
00451 
00452 /* Filter out unreasonable looking genes - input to this
00453  * program isn't totally clean. */
00454 geneSize = gene->end - gene->start + 1;
00455 if (geneSize <= 0 || geneSize >= 1000000)
00456     return FALSE;  
00457 
00458 /* Figure out extents of DNA we're going to return.
00459  * Return extra they ask for if possible, but clip
00460  * it to what is actually in GFF file. */
00461 seqStart = gene->start - leftExtra;
00462 seqEnd = gene->end + rightExtra + 1;
00463 if (seqStart < 0)
00464     seqStart = 0;
00465 if (seqEnd > gff->dnaSize)
00466     seqEnd = gff->dnaSize;
00467 seqSize = seqEnd - seqStart;
00468 
00469 /* Allocate memory and fetch the dna. */
00470 dna = needMem(seqSize+1);
00471 pt = dna;
00472 for (i=0; i<seqSize; i++)
00473     *pt++ = gff->dna[seqStart+i];
00474 *pt = 0;
00475 
00476 /* Report results back to caller. */
00477 *retDna = dna;
00478 *retDnaSize = seqSize;
00479 *retStartOffset = (gene->start - seqStart);
00480 return TRUE;
00481 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gffClose ( struct gff gff  ) 

Definition at line 74 of file oldGff.c.

References gff::dna, gff::file, freeMem(), lmCleanup(), gff::memPool, and zeroBytes().

Referenced by gffOpenAndRead(), and gffReadDnaSeq().

00076 {
00077 if (gff->file != NULL)
00078     fclose(gff->file);
00079 freeMem(gff->dna);
00080 lmCleanup(&gff->memPool);
00081 zeroBytes(gff, sizeof(*gff));
00082 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffDupeGene ( struct gff gff,
struct gffGene oldGene 
) [read]

Definition at line 589 of file oldGff.c.

References gffDupeGeneAndSurrounds().

Referenced by gffGeneWithOwnDna().

00591 {
00592 return gffDupeGeneAndSurrounds(gff, oldGene, 0, 0);
00593 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffDupeGeneAndSurrounds ( struct gff gff,
struct gffGene oldGene,
int  leftExtra,
int  rightExtra 
) [read]

Definition at line 549 of file oldGff.c.

References gffGene::dna, gffGene::dnaSize, dupeSegmentList(), gffGene::end, gffGene::exons, fixDirectionAndOffsets(), geneDna(), gffFreeGene(), gffGene::introns, gffGene::name, needMem(), gffGene::next, slCount(), gffGene::start, and gffGene::strand.

Referenced by gffDupeGene().

00556 {
00557 struct gffGene *g;
00558 int intronCount = slCount(oldGene->introns);
00559 int exonCount = slCount(oldGene->exons);
00560 int memSize = sizeof(*g) + (intronCount + exonCount) * sizeof(struct gffSegment);
00561 char *memPt;
00562 int firstExonOffset;
00563 
00564 
00565 memPt = needMem(memSize);
00566 g = (struct gffGene *)memPt;
00567 memPt += sizeof(*g);
00568 g->exons = (struct gffSegment *)memPt;
00569 memPt += exonCount*sizeof(struct gffSegment);
00570 g->introns = (struct gffSegment *)memPt;
00571 
00572 g->next = NULL;
00573 g->start = oldGene->start;
00574 g->end = oldGene->end;
00575 g->strand = oldGene->strand;
00576 memcpy(g->name, oldGene->name, sizeof(g->name));
00577 g->exons = dupeSegmentList(oldGene->exons, g->exons);
00578 g->introns = dupeSegmentList(oldGene->introns, g->introns);
00579 if (!geneDna(gff, oldGene, leftExtra, rightExtra, 
00580     &g->dna, &g->dnaSize, &firstExonOffset))
00581     {
00582     gffFreeGene(&g);
00583     return NULL;
00584     }
00585 fixDirectionAndOffsets(g, g->dna, g->dnaSize, firstExonOffset);
00586 return g;
00587 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffFindGene ( struct gff gff,
char *  geneName 
) [read]

Definition at line 206 of file oldGff.c.

References gff::genes, gffGene::name, and gffGene::next.

Referenced by gffReadGenes().

00208 {
00209 struct gffGene *g;
00210 
00211 for (g=gff->genes; g!=NULL; g=g->next)
00212     {
00213     if (strcmp(geneName, g->name) == 0)
00214         return g;
00215     }
00216 return NULL;
00217 }

Here is the caller graph for this function:

struct gffGene* gffFindGeneIgnoreCase ( struct gff gff,
char *  geneName 
) [read]

Definition at line 219 of file oldGff.c.

References differentWord(), gff::genes, gffGene::name, and gffGene::next.

Referenced by gffGeneWithOwnDna().

00221 {
00222 struct gffGene *g;
00223 
00224 for (g=gff->genes; g!=NULL; g=g->next)
00225     {
00226     if (differentWord(geneName, g->name) == 0)
00227         return g;
00228     }
00229 return NULL;
00230 }

Here is the call graph for this function:

Here is the caller graph for this function:

void gffFreeGene ( struct gffGene **  pGene  ) 

Definition at line 606 of file oldGff.c.

References gffGene::dna, and freeMem().

Referenced by gffDupeGeneAndSurrounds().

00611 {
00612 struct gffGene *g = *pGene;
00613 if (g == NULL)
00614     return;
00615 freeMem(g->dna);
00616 freeMem(g);
00617 *pGene = NULL;
00618 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct gffGene* gffGeneWithOwnDna ( struct gff gff,
char *  geneName 
) [read]

Definition at line 595 of file oldGff.c.

References gffDupeGene(), and gffFindGeneIgnoreCase().

00597 {
00598 struct gffGene *oldGene;
00599 
00600 oldGene = gffFindGeneIgnoreCase(gff, geneName);
00601 if (oldGene == NULL)
00602     return NULL;
00603 return gffDupeGene(gff, oldGene);
00604 }

Here is the call graph for this function:

static void* gffNeedMem ( struct gff gff,
int  size 
) [static]

Definition at line 234 of file oldGff.c.

References lmAlloc(), and gff::memPool.

Referenced by gffReadGenes().

00235 {
00236 return lmAlloc(gff->memPool, size);
00237 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean gffNextDnaLine ( struct gff gff  )  [static]

Definition at line 151 of file oldGff.c.

References _gffSeekDoubleSharpLine(), gff::buf, gff::bytesInBuf, FALSE, and TRUE.

Referenced by gffReadDna().

00153 {
00154 static char endIdent[] = "##end-DNA";
00155 
00156 if (!_gffSeekDoubleSharpLine(gff)) 
00157     return FALSE;
00158 /* Check to see if have reached end of DNA sequence */
00159 if (strncmp(gff->buf, endIdent, strlen(endIdent))==0)
00160     {
00161     gff->bytesInBuf = 0; /* We're done with gff line. */
00162     return FALSE;
00163     }
00164 return TRUE;
00165 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean gffOpen ( struct gff gff,
char *  fileName 
)

Definition at line 45 of file oldGff.c.

References _gffIdent, _gffSeekDoubleSharpLine(), ArraySize, dnaUtilOpen(), FALSE, fileSize(), lmInit(), TRUE, warn(), and zeroBytes().

Referenced by gffOpenAndRead(), and gffReadDnaSeq().

00047 {
00048     dnaUtilOpen();
00049 
00050     /* Initialize structure and open file. */
00051     zeroBytes(gff, sizeof(*gff));
00052     gff->memPool = lmInit(16*1024);
00053     gff->fileSize = fileSize(fileName);
00054     if (gff->fileSize < 0 ||
00055        (gff->file = fopen(fileName, "rb")) == NULL)
00056             {
00057             warn("Couldn't find the file named %s\n", fileName);
00058             return FALSE;
00059             }
00060     strcpy(gff->fileName, fileName);
00061     gff->bufSize = ArraySize(gff->buf);
00062 
00063     /* Make sure it's a gff file. */
00064     _gffSeekDoubleSharpLine(gff);
00065     if (strncmp(gff->buf, _gffIdent, strlen(_gffIdent)) != 0)
00066         {
00067         warn("%s doesn't appear to be a .gff file\n", fileName);
00068         return FALSE;
00069         }
00070 
00071     return TRUE;
00072 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean gffOpenAndRead ( struct gff gff,
char *  fileName 
)

Definition at line 637 of file oldGff.c.

References FALSE, gffClose(), gffOpen(), gffReadDna(), gffReadGenes(), and TRUE.

00639 {
00640 if (gffOpen(gff, fileName))
00641     if (gffReadDna(gff))
00642         if (gffReadGenes(gff))
00643             return TRUE;
00644 gffClose(gff);
00645 return FALSE;
00646 }

Here is the call graph for this function:

void gffPrintInfo ( struct gff gff,
FILE *  out 
)

Definition at line 280 of file oldGff.c.

References gff::dnaName, gff::dnaSize, gffGene::end, gffGene::exons, gff::fileName, gff::genes, gffGene::introns, gffGene::name, gffGene::next, slCount(), and gffGene::start.

00282 {
00283 struct gffGene *gene;
00284 
00285 fprintf(out, "\n%s\n", gff->fileName);
00286 fprintf(out, "DNA %s (%ld bases)\n", 
00287         gff->dnaName, gff->dnaSize);
00288 fprintf(out, "%d genes\n", slCount(gff->genes));
00289 for (gene = gff->genes; gene != NULL; gene = gene->next)
00290     {
00291     fprintf(out, "gene %s has %ld bases, %d exons, %d introns\n",
00292         gene->name, gene->end - gene->start + 1,
00293         slCount(gene->exons), slCount(gene->introns));
00294     }
00295 }

Here is the call graph for this function:

boolean gffReadDna ( struct gff gff  ) 

Definition at line 167 of file oldGff.c.

References _gffSeekDna(), gff::buf, gff::bytesInBuf, gff::dna, gff::dnaSize, FALSE, gff::fileSize, gffNextDnaLine(), ntChars, gff::readIx, TRUE, wantMem(), and warn().

Referenced by gffOpenAndRead(), and gffReadDnaSeq().

00169 {
00170 long dnaSize = 0;
00171 DNA *dna;
00172 DNA *line;
00173 int lineCount;
00174 DNA b;
00175 if (gff->dna != NULL)
00176         return TRUE; /* We already read it. */
00177 if (!_gffSeekDna(gff))
00178         return FALSE;
00179 if ((gff->dna = wantMem(gff->fileSize)) == NULL)
00180     {
00181     warn("Couldn't allocate %ld bytes for DNA\n",
00182         gff->fileSize);
00183     return FALSE;
00184     }
00185 dna = gff->dna;
00186 for (;;)
00187     {
00188     if (!gffNextDnaLine(gff))
00189         break;
00190     line = gff->buf + gff->readIx;
00191     lineCount = gff->bytesInBuf-gff->readIx;
00192     while (--lineCount >= 0)
00193         {
00194         b = *line++;
00195         if ((b = ntChars[(int)b]) != 0)
00196             {
00197             *dna++ = b;
00198             dnaSize += 1;
00199             }
00200         }
00201     }
00202 gff->dnaSize = dnaSize;
00203 return TRUE;
00204 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct dnaSeq* gffReadDnaSeq ( char *  fileName  )  [read]

Definition at line 620 of file oldGff.c.

References gff::dna, gff::dnaName, gff::dnaSize, gffClose(), gffOpen(), gffReadDna(), and newDnaSeq().

00622 {
00623 struct gff gff;
00624 struct dnaSeq *seq = NULL;
00625 
00626 if (!gffOpen(&gff, fileName))
00627     return NULL;
00628 if (gffReadDna(&gff))
00629     {
00630     seq = newDnaSeq(gff.dna, gff.dnaSize, gff.dnaName);
00631     gff.dna = NULL;
00632     }
00633 gffClose(&gff);
00634 return seq;
00635 }

Here is the call graph for this function:

boolean gffReadGenes ( struct gff gff  ) 

Definition at line 310 of file oldGff.c.

References _gffGetLine(), gff::buf, checkWordCount(), differentWord(), gffSegLine::end, FALSE, gffSegLine::feature, gff::fileName, gffSegLine::frame, gff::genes, gffFindGene(), gffNeedMem(), gffSegLineScan(), gffSegLine::group, gff::lineNumber, slAddTail(), gffSegLine::start, gffSegLine::strand, TRUE, and warn().

Referenced by gffOpenAndRead().

00312 {
00313 int wordCount;
00314 struct gffSegLine seg;
00315 char curGroup[128];
00316 struct gffGene *gene = NULL;
00317 GffIntron *intron = NULL;
00318 GffExon *exon = NULL;
00319 boolean warnedUnknown = FALSE;
00320 boolean isNewGene;
00321 
00322 curGroup[0] = 0; /* Start off with no group */
00323 
00324 /* Line scanning loop. */
00325 for (;;)
00326     {
00327     /* Get next line and parse it into segLine data structure. */
00328     if (!_gffGetLine(gff)) 
00329         break;   /* End of file. */
00330     if (gff->buf[0] == '#')
00331         continue; /* Ignore sharp containing lines. */
00332     wordCount = gffSegLineScan(gff, &seg);
00333     if (wordCount < 9)
00334         continue; /* Ignore blank lines and short ones. */
00335 
00336     /* Make sure that start is less than or equal end. */
00337     if (seg.start > seg.end)
00338         {
00339         warn("start greater than end line %d of %s.\n",
00340                 gff->lineNumber, gff->fileName);
00341         return FALSE;
00342         }
00343 
00344     /* Get the gene we're working on.  First see if
00345      * it's the same as last time around. */
00346     isNewGene = FALSE;
00347     if (strcmp(seg.group, curGroup) != 0)
00348         {
00349         strcpy(curGroup, seg.group);
00350         if ((gene = gffFindGene(gff, seg.group)) == NULL)
00351             {
00352             /* It's a new gene! */
00353             if (!checkWordCount(gff, wordCount)) return FALSE;
00354             isNewGene = TRUE;
00355             gene = gffNeedMem(gff, sizeof(*gene));
00356             strcpy(gene->name, seg.group);
00357             slAddTail(&gff->genes, gene); 
00358             gene->strand = seg.strand[0];
00359             gene->frame = atoi(seg.frame);
00360             if (differentWord(seg.feature, "CDS") == 0)
00361                 {
00362                 gene->start = seg.start-1;
00363                 gene->end = seg.end-1;
00364                 }
00365             }
00366         }
00367 
00368     /* Look at what sort of feature it is, and decide what to do. */
00369 
00370     if (differentWord(seg.feature, "CDS")==0)
00371         {
00372         /* CDS (coding segments) have been processed already
00373          * for the most part. Here just make sure they aren't
00374          * duplicated. */
00375         if (!checkWordCount(gff, wordCount)) return FALSE;
00376         if (!isNewGene)
00377             {
00378             if (gene->start != 0 || gene->end != 0)
00379                 {
00380                 warn("Warning duplicate CDS for %s\n",
00381                         seg.group);
00382                 warn("Line %d of %s\n", 
00383                         gff->lineNumber, gff->fileName);
00384                 }
00385             }
00386         }
00387     else if (differentWord(seg.feature, "SE") == 0 
00388         ||   differentWord(seg.feature, "IE") == 0
00389         ||   differentWord(seg.feature, "FE") == 0
00390         ||   differentWord(seg.feature, "E") == 0
00391         ||   differentWord(seg.feature, "exon") == 0)
00392         {
00393         /* It's some sort of exon.  We'll deal with the complications
00394          * of it being possibly on the minus strand later, so can
00395          * tread initial, final, single, and regular exons the same
00396          * here. */
00397         if (!checkWordCount(gff, wordCount)) return FALSE;
00398         exon = gffNeedMem(gff, sizeof(*exon));
00399         exon->start = seg.start-1;
00400         exon->end = seg.end-1;
00401         exon->frame = atoi(seg.frame);
00402         gffSegmentInsertSort(&gene->exons, exon);
00403         }
00404     else if (differentWord(seg.feature, "I") == 0 
00405         ||   differentWord(seg.feature, "intron") == 0)
00406         {
00407         /* It's an intron. */
00408         if (!checkWordCount(gff, wordCount)) return FALSE;
00409         intron = gffNeedMem(gff, sizeof(*intron));
00410         intron->start = seg.start-1;
00411         intron->end = seg.end-1;
00412         intron->frame = atoi(seg.frame);
00413         gffSegmentInsertSort(&gene->introns, intron);
00414         }
00415     else if (strcmp(seg.feature, "IG")  == 0)
00416         {
00417         /* I don't know what it is, but we can ignore it. */
00418         }
00419     else
00420         {
00421         if (!warnedUnknown)
00422             {
00423             warn("Unknown feature %s line %d of %s, ignoring\n",
00424                     seg.feature,  gff->lineNumber, gff->fileName);
00425             warnedUnknown = TRUE;
00426             }
00427         }
00428     }
00429 
00430 /* Fix up gene length from exons if needed. */
00431 for (gene = gff->genes; gene != NULL; gene = gene->next)
00432     {
00433     if (gene->start >= gene->end)
00434         {
00435         offsetsFromExons(gene);
00436         }
00437     }
00438 return TRUE;
00439 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int gffSegLineScan ( struct gff gff,
struct gffSegLine seg 
) [static]

Definition at line 34 of file oldGff.c.

References gff::buf, gffSegLine::end, gffSegLine::feature, gffSegLine::frame, gffSegLine::group, gffSegLine::score, gffSegLine::seqname, gffSegLine::source, gffSegLine::start, and gffSegLine::strand.

Referenced by gffReadGenes().

00035 {
00036     int scanned = sscanf(gff->buf, "%s %s %s %ld %ld %s %1s %s %s",
00037         seg->seqname, seg->source, seg->feature,
00038         &seg->start, &seg->end,
00039         seg->score, seg->strand, seg->frame, seg->group);
00040     return scanned;
00041 }

Here is the caller graph for this function:

static void gffSegmentInsertSort ( struct gffSegment **  plist,
struct gffSegment seg 
) [static]

Definition at line 239 of file oldGff.c.

References gffSegment::next, and gffSegment::start.

00242              :
00243          gffSegment **plist;     Pointer to list. 
00244          gffSegment *seg;        Segment to insert 
00245  */
00246 {
00247 struct gffSegment *next;
00248 long segStart = seg->start;
00249 
00250 for (;;)
00251     {
00252     next = *plist;
00253     if (next == NULL)
00254         break;
00255     if (next->start > segStart)
00256         break;
00257     plist = &(next->next);
00258     }
00259 seg->next = next;   
00260 *plist = seg;
00261 }

static void offsetsFromExons ( struct gffGene gene  )  [static]

Definition at line 263 of file oldGff.c.

References gffGene::end, gffSegment::end, gffGene::exons, gffSegment::next, gffGene::start, and gffSegment::start.

00265 {
00266 GffExon *exon;
00267 long end = 0;
00268 long start = 0x7fffffff; /* I should use a .h file constant here... */
00269 for (exon = gene->exons; exon != NULL; exon = exon->next)
00270     {
00271     if (exon->start < start)
00272         start = exon->start;
00273     if (exon->end > end)
00274         end = exon->end;
00275     }
00276 gene->start = start;
00277 gene->end = end;
00278 }


Variable Documentation

char _gffIdent[] = "##gff-version" [static]

Definition at line 20 of file oldGff.c.

Referenced by gffOpen().

char const rcsid[] = "$Id: oldGff.c,v 1.6 2005/04/10 14:41:24 markd Exp $" [static]

Definition at line 16 of file oldGff.c.


Generated on Tue Dec 25 20:07:06 2007 for blat by  doxygen 1.5.2