lib/seqStats.c

Go to the documentation of this file.
00001 #include "common.h"
00002 #include "dnautil.h"
00003 #include "seqStats.h"
00004 
00005 double dnaMatchEntropy(DNA *query, DNA *target, int baseCount)
00006 /* Return entropy of matching bases - a number between 0 and 1, with
00007  * higher numbers the more diverse the matching bases. */
00008 {
00009 #define log4 1.386294
00010 #define invLog4 (1.0/log4)
00011 double p, q, e = 0, invTotal;
00012 int c, count[4], total;
00013 int i, qVal, tVal;
00014 count[0] = count[1] = count[2] = count[3] = 0;
00015 for (i=0; i<baseCount; ++i)
00016     {
00017     qVal = ntVal[(int)query[i]];
00018     tVal = ntVal[(int)target[i]];
00019     if (qVal == tVal && qVal >= 0)
00020         count[qVal] += 1;
00021     }
00022 total = count[0] + count[1] + count[2] + count[3];
00023 invTotal = 1.0/total;
00024 for (i=0; i<4; ++i)
00025     {
00026     if ((c = count[i]) > 0)
00027         {
00028         p = c * invTotal;
00029         q = log(p);
00030         e -= p*q;
00031         }
00032     }
00033 e *= invLog4;
00034 return e;
00035 }
00036 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2