00001
00002
00003
00004
00005
00006 #include "common.h"
00007 #include "dnautil.h"
00008 #include "hmmstats.h"
00009 #include "codebias.h"
00010
00011 static char const rcsid[] = "$Id: codebias.c,v 1.6 2005/04/10 14:41:21 markd Exp $";
00012
00013
00014 struct codonBias *codonLoadBias(char *fileName)
00015
00016
00017 {
00018 struct codonBias *cb;
00019 char line[1024];
00020 int lineCount = 0;
00021 char *words[128];
00022 int wordCount;
00023 int i = 0, j = 0;
00024 int skip = 0;
00025 boolean getMark0 = FALSE;
00026 boolean getMark1 = FALSE;
00027 FILE *f = mustOpen(fileName, "r");
00028 int val;
00029
00030 AllocVar(cb);
00031 while (fgets(line, sizeof(line), f) )
00032 {
00033 ++lineCount;
00034 if (skip)
00035 {
00036 skip -= 1;
00037 continue;
00038 }
00039 if (getMark1)
00040 {
00041 wordCount = chopLine(line, words);
00042 if (wordCount != 65)
00043 errAbort("Bad line %d of %s\n", lineCount, fileName);
00044 for (j=0; j<64; ++j)
00045 {
00046 val = atoi(words[j+1]);
00047 if (val == 0)
00048 cb->mark1[i][j] = scaledLog(1.0E-20);
00049 else
00050 cb->mark1[i][j] = scaledLog(0.001*val);
00051 }
00052 if ((i += 1) == 64)
00053 getMark1 = FALSE;
00054 }
00055 else if (getMark0)
00056 {
00057 wordCount = chopLine(line, words);
00058 if (wordCount != 64)
00059 errAbort("Bad line %d of %s\n", lineCount, fileName);
00060 for (j=0; j<64; ++j)
00061 {
00062 val = atoi(words[j]);
00063 if (val == 0)
00064 cb->mark0[j] = scaledLog(1.0E-20);
00065 else
00066 cb->mark0[j] = scaledLog(0.001*val);
00067 }
00068 getMark0 = FALSE;
00069 }
00070 else if (startsWith("Markov", line))
00071 {
00072 wordCount = chopLine(line, words);
00073 if (wordCount != 2)
00074 errAbort("Bad line %d of %s\n", lineCount, fileName);
00075 if (sameString(words[1], "0"))
00076 getMark0 = TRUE;
00077 else if (sameString(words[1], "1"))
00078 getMark1 = TRUE;
00079 else
00080 errAbort("Bad line %d of %s\n", lineCount, fileName);
00081 skip = 3;
00082 }
00083 }
00084 fclose(f);
00085 return cb;
00086 }
00087
00088 static void unN(DNA *dna, int dnaSize)
00089
00090 {
00091 int i;
00092 int val;
00093 for (i=0; i<dnaSize; ++i)
00094 {
00095 if ((val = ntVal[(int)dna[i]]) < 0)
00096 dna[i] = 't';
00097 }
00098 }
00099
00100 int codonFindFrame(DNA *dna, int dnaSize, struct codonBias *forBias)
00101
00102
00103 {
00104 double logOneFourth = log(0.25);
00105 double logProbs[3];
00106 int frame;
00107 int dnaIx;
00108 double logP;
00109 double bestLogP = -0x6fffffff;
00110 int bestFrame = -1;
00111 int lastDnaStart = dnaSize-3;
00112 DNA *d;
00113 int codon = 0, lastCodon;
00114
00115 unN(dna, dnaSize);
00116 for (frame=0; frame<3; ++frame)
00117 {
00118
00119 logP = frame*logOneFourth;
00120
00121 if (frame <= lastDnaStart)
00122 {
00123 d = dna+frame;
00124 codon = (ntVal[(int)d[0]]<<4) + (ntVal[(int)d[1]]<<2) + ntVal[(int)d[2]];
00125 logP += forBias->mark0[codon];
00126 }
00127
00128 for (dnaIx = frame+3; dnaIx <= lastDnaStart; dnaIx += 3)
00129 {
00130 lastCodon = codon;
00131 d = dna+dnaIx;
00132 codon = (ntVal[(int)d[0]]<<4) + (ntVal[(int)d[1]]<<2) + ntVal[(int)d[2]];
00133 logP += forBias->mark1[lastCodon][codon];
00134 }
00135
00136 logP += (dnaSize-dnaIx)*logOneFourth;
00137 logProbs[frame] = logP;
00138 if (logP > bestLogP)
00139 {
00140 bestLogP = logP;
00141 bestFrame = frame;
00142 }
00143 }
00144 return bestFrame;
00145 }
00146
00147
00148