00001
00002
00003
00004
00005 #include "common.h"
00006 #include "maf.h"
00007
00008 static char const rcsid[] = "$Id: mafScore.c,v 1.9 2005/11/23 17:20:50 braney Exp $";
00009
00010
00011 typedef struct gap_scores {
00012 int E;
00013 int O;
00014 } gap_scores_t;
00015
00016 #define CLEN(s) (sizeof((s))-1)
00017 #define NACHARS 128
00018 #define SS(c,d) ss[(uchar)c][(uchar)d]
00019 #define GAP(w,x,y,z) gop[(gtype[w]<<6)+(gtype[x]<<4)+(gtype[y]<<2)+gtype[z]]
00020 #define DASH '-'
00021
00022 typedef int ss_t[NACHARS][NACHARS];
00023 typedef unsigned char uchar;
00024
00025 static ss_t ss;
00026 static gap_scores_t ds;
00027 static int gop[256], gtype[128];
00028
00029 static const uchar nchars[] = "ACGT";
00030 static const int HOXD70_sym[4][4] = {
00031 { 91, -114, -31, -123 },
00032 {-114, 100, -125, -31 },
00033 { -31, -125, 100, -114 },
00034 {-123, -31, -114, 91 },
00035 };
00036
00037
00038 static void DNA_scores(ss_t ss)
00039 {
00040 int i, j, bad, a, b, A, B;
00041
00042 for (i = 0; i < NACHARS; ++i)
00043 for (j = 0; j < NACHARS; ++j)
00044 ss[i][j] = -100;
00045 for (i = 0; i < (signed)CLEN(nchars); ++i) {
00046 A = nchars[i];
00047 a = tolower(A);
00048 for (j = 0; j < (signed)CLEN(nchars); ++j) {
00049 B = nchars[j];
00050 b = tolower(B);
00051 ss[A][B] = ss[a][B] = ss[A][b] = ss[a][b] =
00052 HOXD70_sym[i][j];
00053 }
00054 }
00055 bad = -1000;
00056 for (i = 0; i < NACHARS; ++i)
00057 ss['X'][i] = ss[i]['X'] = ss['x'][i] = ss[i]['x'] = bad;
00058 }
00059
00060
00061 static void gap_costs(int *gop, int *gtype, int gap_open)
00062 {
00063 int i, X, D;
00064
00065 for (i = 0; i < 128; ++i)
00066 gtype[i] = 0;
00067 D = DASH;
00068 gtype[D] = 1;
00069
00070 for (i = 0; i < 256; ++i)
00071 gop[i] = 0;
00072 X = (uchar)'A';
00073 GAP(X,X,X,D) = gap_open;
00074 GAP(X,X,D,X) = gap_open;
00075 GAP(X,D,D,X) = gap_open;
00076 GAP(D,X,X,D) = gap_open;
00077 GAP(D,D,X,D) = gap_open;
00078 GAP(D,D,D,X) = gap_open;
00079 }
00080
00081 double mafScoreRangeMultiz(struct mafAli *maf, int start, int size)
00082
00083
00084
00085
00086
00087
00088
00089
00090 {
00091 uchar ai, ar, bi, br;
00092 int i;
00093 double score;
00094 struct mafComp *c1, *c2;
00095
00096 if (start < 0 || size <= 0 ||
00097 start+size > maf->textSize) {
00098 errAbort( "mafScoreRange: start = %d, size = %d, textSize = %d\n",
00099 start, size, maf->textSize);
00100 }
00101 if (ss['A']['A'] != HOXD70_sym[0][0]) {
00102 DNA_scores(ss);
00103 ds.E = 30;
00104 ds.O = 400;
00105 for (i = 0; i < 128; ++i)
00106 ss[i][DASH] = ss[DASH][i] = -ds.E;
00107 ss[DASH][DASH] = 0;
00108 gap_costs(gop, gtype, ds.O);
00109 }
00110 score = 0.0;
00111 for (i = start; i < start+size; ++i) {
00112 for (c1 = maf->components; c1 != NULL; c1 = c1->next) {
00113 if (c1->size == 0) continue;
00114 br = c1->text[i];
00115 for (c2 = c1->next; c2 != NULL; c2 = c2->next) {
00116 if (c2->size == 0) continue;
00117 bi = c2->text[i];
00118 score += SS(br, bi);
00119 if (i > 0) {
00120 ar = c1->text[i-1];
00121 ai = c2->text[i-1];
00122 score -= GAP(ar,ai,br,bi);
00123 }
00124 }
00125 }
00126 }
00127 return score;
00128 }
00129
00130 double mafScoreMultiz(struct mafAli *maf)
00131
00132
00133 {
00134 return mafScoreRangeMultiz(maf, 0, maf->textSize);
00135 }
00136
00137 double mafScoreMultizMaxCol(int species)
00138
00139 {
00140 int i, count = 0;
00141 for (i=1; i<species; ++i)
00142 count += i;
00143 return 100.0*count;
00144 }
00145
00146 void mafColMinMaxScore(struct mafAli *maf,
00147 double *retMin, double *retMax)
00148
00149 {
00150 *retMax = mafScoreMultizMaxCol(slCount(maf->components));
00151 *retMin = -*retMax;
00152 }
00153