lib/mafScore.c

Go to the documentation of this file.
00001 /* Score mafs and subsets of maf. 
00002  * This module is from Webb Miller at PSU. 
00003  * Some description of maf scoring is included in hgLoadMaf.c comments*/
00004 
00005 #include "common.h"
00006 #include "maf.h"
00007 
00008 static char const rcsid[] = "$Id: mafScore.c,v 1.9 2005/11/23 17:20:50 braney Exp $";
00009 
00010 
00011 typedef struct gap_scores {
00012         int E;
00013         int O;
00014 } gap_scores_t;
00015 
00016 #define CLEN(s) (sizeof((s))-1)
00017 #define NACHARS 128
00018 #define SS(c,d) ss[(uchar)c][(uchar)d]
00019 #define GAP(w,x,y,z) gop[(gtype[w]<<6)+(gtype[x]<<4)+(gtype[y]<<2)+gtype[z]]
00020 #define DASH '-'
00021 
00022 typedef int ss_t[NACHARS][NACHARS];
00023 typedef unsigned char uchar;
00024 
00025 static ss_t ss;
00026 static gap_scores_t ds;
00027 static int gop[256], gtype[128];
00028 
00029 static const uchar nchars[] = "ACGT";
00030 static const int HOXD70_sym[4][4] = {
00031   {  91, -114,  -31, -123 },
00032   {-114,  100, -125,  -31 },
00033   { -31, -125,  100, -114 },
00034   {-123,  -31, -114,   91 },
00035 };
00036 
00037 /* DNA_scores --------------------------  substitution scoring matrix for DNA */
00038 static void DNA_scores(ss_t ss)
00039 {
00040         int i, j, bad, a, b, A, B;
00041 
00042         for (i = 0; i < NACHARS; ++i)
00043                 for (j = 0; j < NACHARS; ++j)
00044                         ss[i][j] = -100;
00045         for (i = 0; i < (signed)CLEN(nchars); ++i) {
00046                 A = nchars[i];
00047                 a = tolower(A);
00048                 for (j = 0; j < (signed)CLEN(nchars); ++j) {
00049                         B = nchars[j];
00050                         b = tolower(B);
00051                         ss[A][B] = ss[a][B] = ss[A][b] = ss[a][b] =
00052                                 HOXD70_sym[i][j];
00053                 }
00054         }
00055         bad = -1000;
00056         for (i = 0; i < NACHARS; ++i)
00057                 ss['X'][i] = ss[i]['X'] = ss['x'][i] = ss[i]['x'] = bad;
00058 }
00059 
00060 
00061 static void gap_costs(int *gop, int *gtype, int gap_open)
00062 {
00063         int i, X, D;
00064 
00065         for (i = 0; i < 128; ++i)
00066                 gtype[i] = 0;
00067         D = DASH;
00068         gtype[D] = 1;
00069 
00070         for (i = 0; i < 256; ++i)
00071                 gop[i] = 0;
00072         X = (uchar)'A';
00073         GAP(X,X,X,D) = gap_open;
00074         GAP(X,X,D,X) = gap_open;
00075         GAP(X,D,D,X) = gap_open;
00076         GAP(D,X,X,D) = gap_open;
00077         GAP(D,D,X,D) = gap_open;
00078         GAP(D,D,D,X) = gap_open;
00079 }
00080 
00081 double mafScoreRangeMultiz(struct mafAli *maf, int start, int size)
00082 /* Return score of a subset of an alignment.  Parameters are:
00083  *    maf - the alignment
00084  *    start - the (zero based) offset to start calculating score
00085  *    size - the size of the subset
00086  * The following relationship should hold:
00087  *   scoreRange(maf,start,size) =
00088  *      scoreRange(maf,0,start+size) - scoreRange(maf,0,start)
00089  */
00090 {
00091 uchar ai, ar, bi, br;
00092 int i;
00093 double score;
00094 struct mafComp *c1, *c2;
00095 
00096 if (start < 0 || size <= 0 || 
00097     start+size > maf->textSize) {
00098         errAbort( "mafScoreRange: start = %d, size = %d, textSize = %d\n",
00099                 start, size, maf->textSize);
00100 }
00101 if (ss['A']['A'] != HOXD70_sym[0][0]) {
00102         DNA_scores(ss);
00103         ds.E = 30;
00104         ds.O = 400;
00105         for (i = 0; i < 128; ++i)
00106                 ss[i][DASH] = ss[DASH][i] = -ds.E;
00107         ss[DASH][DASH] = 0;
00108         gap_costs(gop, gtype, ds.O);   /* quasi-natural gap costs */
00109 }
00110 score = 0.0;
00111 for (i = start; i < start+size; ++i) {
00112         for (c1 = maf->components; c1 != NULL; c1 = c1->next) {
00113                 if (c1->size == 0) continue;
00114                 br = c1->text[i];
00115                 for (c2 = c1->next; c2 != NULL; c2 = c2->next) {
00116                         if (c2->size == 0) continue;
00117                         bi = c2->text[i];
00118                         score += SS(br, bi);
00119                         if (i > 0) {
00120                                 ar = c1->text[i-1];
00121                                 ai = c2->text[i-1];
00122                                 score -= GAP(ar,ai,br,bi);
00123                         }
00124                 }
00125         }
00126 }
00127 return score;
00128 }
00129 
00130 double mafScoreMultiz(struct mafAli *maf)
00131 /* Return score of a maf (calculated rather than what is
00132  * stored in the structure. */
00133 {
00134 return mafScoreRangeMultiz(maf, 0, maf->textSize);
00135 }
00136 
00137 double mafScoreMultizMaxCol(int species)
00138 /* Return maximum possible score for a column. */
00139 {
00140 int i, count = 0;
00141 for (i=1; i<species; ++i)
00142     count += i;
00143 return 100.0*count; 
00144 }
00145 
00146 void mafColMinMaxScore(struct mafAli *maf, 
00147         double *retMin, double *retMax)
00148 /* Get min/max maf scores for a column. */
00149 {
00150 *retMax = mafScoreMultizMaxCol(slCount(maf->components));
00151 *retMin = -*retMax;
00152 }
00153 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2