lib/gdf.c

Go to the documentation of this file.
00001 /* gdf - Intronerator Gene Description File. 
00002  *
00003  * This file is copyright 2002 Jim Kent, but license is hereby
00004  * granted for all use - public, private or commercial. */
00005 
00006 #include "common.h"
00007 #include "gdf.h"
00008 
00009 static char const rcsid[] = "$Id: gdf.c,v 1.4 2003/05/06 07:33:42 kate Exp $";
00010 
00011 struct gdfGene *newGdfGene(char *name, int nameSize, int exonCount, char strand, UBYTE chromIx)
00012 /* Return a new gene. */
00013 {
00014 struct gdfGene *gene = needMem(sizeof *gene);
00015 gene->name = cloneStringZ(name, nameSize);
00016 gene->dataCount = exonCount*2;
00017 if (exonCount > 0)
00018     {
00019     gene->dataPoints = 
00020             needMem(gene->dataCount * sizeof(gene->dataPoints[0]));
00021     }
00022 gene->strand = strand;
00023 gene->chromIx = chromIx;
00024 return gene;
00025 }
00026 
00027 void gdfFreeGene(struct gdfGene *gene)
00028 /* Free a gene. */
00029 {
00030 if (gene != NULL)
00031     {
00032     freeMem(gene->name);
00033     freeMem(gene->dataPoints);
00034     freeMem(gene);
00035     }
00036 }
00037 
00038 void gdfFreeGeneList(struct gdfGene **pList)
00039 /* Free a whole list of genes. */
00040 {
00041 struct gdfGene *gene, *next;
00042 gene = *pList;
00043 while (gene != NULL)
00044     {
00045     next = gene->next;
00046     gdfFreeGene(gene);
00047     gene = next;
00048     }
00049 *pList = NULL;
00050 }
00051 
00052 struct gdfGene *gdfReadOneGene(FILE *f)
00053 /* Read one entry from a Gdf file.  Assumes that the file pointer
00054  * is in the right place. */
00055 {
00056 short pointCount;
00057 char strand;
00058 UBYTE geneNameSize, chromIx;
00059 char geneNameBuf[128];
00060 struct gdfGene *gene;
00061 
00062 mustReadOne(f, geneNameSize);
00063 mustRead(f, geneNameBuf, geneNameSize);
00064 geneNameBuf[geneNameSize] = 0;
00065 mustReadOne(f, chromIx);
00066 mustReadOne(f, strand);
00067 mustReadOne(f, pointCount);
00068 gene = newGdfGene(geneNameBuf, geneNameSize, pointCount>>1, strand, chromIx);
00069 mustRead(f, gene->dataPoints, sizeof(gene->dataPoints[0]) * pointCount);
00070 return gene;
00071 }
00072 
00073 void gdfGeneExtents(struct gdfGene *gene, long *pMin, long *pMax)
00074 /* Figure out first and last base in gene. */
00075 {
00076 int i;
00077 long x;
00078 long min=0x7000000;
00079 long max = -min;
00080 
00081 for (i=0; i<gene->dataCount; i+=1)
00082     {
00083     x = gene->dataPoints[i].start;
00084     if (x < min)
00085         min = x;
00086     if (x > max)
00087         max = x;
00088     }
00089 *pMin = min;
00090 *pMax = max;
00091 }
00092 
00093 void gdfOffsetGene(struct gdfGene *gene, int offset)
00094 /* Add offset to each point in gene */
00095 {
00096 struct gdfDataPoint *dp = gene->dataPoints;
00097 int count = gene->dataCount;
00098 int i;
00099 for (i=0; i<count; ++i)
00100     dp[i].start += offset;
00101 }
00102 
00103 void gdfRcGene(struct gdfGene *gene, int size)
00104 /* Flip gene to other strand. Assumes dataPoints are already
00105  * moved into range from 0-size */
00106 {
00107 struct gdfDataPoint *s = gene->dataPoints, *e, temp;
00108 int count = gene->dataCount;
00109 int i;
00110 int halfCount = count/2;
00111 
00112 
00113 for (i=0; i<count; ++i)
00114     {
00115     s->start = reverseOffset(s->start, size) + 1;
00116     ++s;
00117     }
00118 s = gene->dataPoints;
00119 e = s + gene->dataCount-1;
00120 for (i=0; i<halfCount; i += 1)
00121     {
00122     memcpy(&temp, s, sizeof(temp));
00123     memcpy(s, e, sizeof(temp));
00124     memcpy(e, &temp, sizeof(temp));
00125     s += 1;
00126     e -= 1;
00127     }
00128 }
00129 
00130 
00131 void gdfUpcExons(struct gdfGene *gene, int geneOffset, DNA *dna, int dnaSize, int dnaOffset)
00132 /* Uppercase exons in DNA. */
00133 {
00134 struct gdfDataPoint *dp = gene->dataPoints;
00135 int count = gene->dataCount;
00136 int start, end;
00137 long gffStart, gffEnd;
00138 int combinedOffset;
00139 int i;
00140 
00141 gdfGeneExtents(gene, &gffStart, &gffEnd);
00142 combinedOffset = -gffStart + geneOffset - dnaOffset;
00143 for (i=0; i<count; i += 2)
00144     {
00145     start = dp[i].start + combinedOffset;
00146     end = dp[i+1].start + combinedOffset;
00147     if (end <= 0 || start >= dnaSize)
00148         continue;
00149     if (start < 0) start = 0;
00150     if (end > dnaSize) end = dnaSize;
00151     toUpperN(dna+start, end-start);
00152     }
00153 }
00154 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2