00001
00002
00003
00004 #include "common.h"
00005 #include "dnautil.h"
00006 #include "dnaseq.h"
00007 #include "slog.h"
00008 #include "dnaMarkov.h"
00009
00010 static char const rcsid[] = "$Id: dnaMarkov.c,v 1.4 2005/04/10 14:41:22 markd Exp $";
00011
00012 void dnaMark0(struct dnaSeq *seqList, double mark0[5], int slogMark0[5])
00013
00014
00015
00016 {
00017 struct dnaSeq *seq;
00018 int histo[4];
00019 int oneHisto[4];
00020 double total;
00021 int i;
00022 double *freq = mark0+1;
00023
00024 zeroBytes(histo, sizeof(histo));
00025 for (seq = seqList; seq != NULL; seq = seq->next)
00026 {
00027 dnaBaseHistogram(seq->dna, seq->size, oneHisto);
00028 for (i=0; i<4; ++i)
00029 histo[i] += oneHisto[i];
00030 }
00031 total = histo[0] + histo[1] + histo[2] + histo[3];
00032 freq[-1] = 1.0;
00033 for (i=0; i<4; ++i)
00034 freq[i] = (double)histo[i] / total;
00035 if (slogMark0 != NULL)
00036 {
00037 int *slogFreq = slogMark0 + 1;
00038 slogFreq[-1] = 0;
00039 for (i=0; i<4; ++i)
00040 slogFreq[i] = slog(freq[i]);
00041 }
00042 }
00043
00044
00045 void dnaMark1(struct dnaSeq *seqList, double mark0[5], int slogMark0[5],
00046 double mark1[5][5], int slogMark1[5][5])
00047
00048
00049
00050 {
00051 struct dnaSeq *seq;
00052 DNA *dna, *endDna;
00053 int i,j;
00054 int histo[5][5];
00055 int hist1[5];
00056
00057 zeroBytes(histo, sizeof(histo));
00058 zeroBytes(hist1, sizeof(hist1));
00059 for (seq = seqList; seq != NULL; seq = seq->next)
00060 {
00061 dna = seq->dna;
00062 endDna = dna + seq->size-1;
00063 for (;dna < endDna; ++dna)
00064 {
00065 i = ntVal[(int)dna[0]];
00066 j = ntVal[(int)dna[1]];
00067 hist1[i+1] += 1;
00068 histo[i+1][j+1] += 1;
00069 }
00070 }
00071 for (i=0; i<5; ++i)
00072 {
00073 for (j=0; j<5; ++j)
00074 {
00075 double mark1Val;
00076 int matVal = histo[i][j] + 1;
00077 mark1Val = ((double)matVal)/(hist1[i]+5);
00078 mark1[i][j] = mark1Val;
00079 if (slogMark1 != NULL)
00080 slogMark1[i][j] = slog(mark1Val);
00081 }
00082 }
00083 for (i=0; i<5; ++i)
00084 {
00085 mark1[i][0] = 1;
00086 mark1[0][i] = mark0[i];
00087 if (slogMark1 != NULL)
00088 {
00089 slogMark1[i][0] = 0;
00090 slogMark1[0][i] = slogMark0[i];
00091 }
00092 }
00093 }
00094
00095 void dnaMarkTriple(struct dnaSeq *seqList,
00096 double mark0[5], int slogMark0[5],
00097 double mark1[5][5], int slogMark1[5][5],
00098 double mark2[5][5][5], int slogMark2[5][5][5],
00099 int offset, int advance, int earlyEnd)
00100
00101
00102
00103 {
00104 struct dnaSeq *seq;
00105 DNA *dna, *endDna;
00106 int i,j,k;
00107 int histo[5][5][5];
00108 int hist2[5][5];
00109 int total = 0;
00110 zeroBytes(histo, sizeof(histo));
00111 zeroBytes(hist2, sizeof(hist2));
00112 for (seq = seqList; seq != NULL; seq = seq->next)
00113 {
00114 dna = seq->dna;
00115 endDna = dna + seq->size - earlyEnd - 2;
00116 dna += offset;
00117 for (;dna < endDna; dna += advance)
00118 {
00119 i = ntVal[(int)dna[0]];
00120 j = ntVal[(int)dna[1]];
00121 k = ntVal[(int)dna[2]];
00122 hist2[i+1][j+1] += 1;
00123 histo[i+1][j+1][k+1] += 1;
00124 total += 1;
00125 }
00126 }
00127 for (i=0; i<5; ++i)
00128 {
00129 for (j=0; j<5; ++j)
00130 {
00131 for (k=0; k<5; ++k)
00132 {
00133 double markVal;
00134 int matVal = histo[i][j][k]+1;
00135 if (i == 0 || j == 0 || k == 0)
00136 {
00137 if (k == 0)
00138 {
00139 mark2[i][j][k] = 1;
00140 if (slogMark2 != NULL)
00141 slogMark2[i][j][k] = 0;
00142 }
00143 else if (j == 0)
00144 {
00145 mark2[i][j][k] = mark0[k];
00146 if (slogMark2 != NULL)
00147 slogMark2[i][j][k] = slogMark0[k];
00148 }
00149 else if (i == 0)
00150 {
00151 mark2[i][j][k] = mark1[j][k];
00152 if (slogMark2 != NULL)
00153 slogMark2[i][j][k] = slogMark1[j][k];
00154 }
00155 }
00156 else
00157 {
00158 markVal = ((double)matVal)/(hist2[i][j]+5);
00159 mark2[i][j][k] = markVal;
00160 if (slogMark2 != NULL)
00161 slogMark2[i][j][k] = slog(markVal);
00162 }
00163 }
00164 }
00165 }
00166 }
00167
00168 void dnaMark2(struct dnaSeq *seqList, double mark0[5], int slogMark0[5],
00169 double mark1[5][5], int slogMark1[5][5],
00170 double mark2[5][5][5], int slogMark2[5][5][5])
00171
00172
00173 {
00174 dnaMarkTriple(seqList, mark0, slogMark0, mark1, slogMark1,
00175 mark2, slogMark2, 0, 1, 0);
00176 }
00177