00001
00002
00003 #include "common.h"
00004 #include "dnautil.h"
00005 #include "obscure.h"
00006 #include "fuzzyFind.h"
00007
00008 static char const rcsid[] = "$Id: ffScore.c,v 1.5 2003/10/21 22:06:32 kent Exp $";
00009
00010 int ffIntronMax = ffIntronMaxDefault;
00011
00012 void setFfIntronMax(int value)
00013 {
00014 ffIntronMax = value;
00015 }
00016
00017 int ffScoreMatch(DNA *a, DNA *b, int size)
00018
00019
00020
00021 {
00022 int i;
00023 int score = 0;
00024 for (i=0; i<size; ++i)
00025 {
00026 DNA aa = a[i];
00027 DNA bb = b[i];
00028 if (aa == 'n' || bb == 'n')
00029 continue;
00030 if (aa == bb)
00031 ++score;
00032 else
00033 score -= 1;
00034 }
00035 return score;
00036 }
00037
00038 int ffCalcCdnaGapPenalty(int hGap, int nGap)
00039
00040 {
00041 int acc = 2;
00042 if (hGap > 400000)
00043 {
00044 acc += (hGap - 400000)/3000;
00045 if (hGap > ffIntronMax)
00046 acc += (hGap - ffIntronMax)/2000;
00047 }
00048 if (hGap < 0)
00049 {
00050 hGap = -8*hGap;
00051 if (hGap > 48)
00052 hGap = (hGap*hGap);
00053 }
00054 if (nGap < 0)
00055 {
00056 acc += -nGap;
00057 nGap = 0;
00058 }
00059 acc += digitsBaseTwo(hGap)/2;
00060 if (nGap != 0)
00061 {
00062 acc += digitsBaseTwo(nGap);
00063 }
00064 else
00065 {
00066 if (hGap > 30)
00067 acc -= 1;
00068 }
00069 return acc;
00070 }
00071
00072 static int calcTightGap(int hGap, int nGap)
00073
00074 {
00075 if (hGap == 0 && nGap == 0)
00076 return 0;
00077 else
00078 {
00079 int overlap = min(hGap, nGap);
00080 int penalty = 8;
00081 if (overlap < 0)
00082 overlap = 0;
00083
00084 if (hGap < 0)
00085 hGap = -8*hGap;
00086 if (nGap < 0)
00087 nGap = -2*nGap;
00088 penalty += (hGap-overlap + nGap-overlap) + overlap;
00089 return penalty;
00090 }
00091 }
00092
00093 static int calcLooseGap(int hGap, int nGap)
00094
00095 {
00096 if (hGap == 0 && nGap == 0)
00097 return 0;
00098 else
00099 {
00100 int overlap = min(hGap, nGap);
00101 int penalty = 8;
00102 if (overlap < 0)
00103 overlap = 0;
00104
00105 if (hGap < 0)
00106 hGap = -8*hGap;
00107 if (nGap < 0)
00108 nGap = -2*nGap;
00109 penalty += log(hGap-overlap+1) + log(nGap-overlap+1);
00110 return penalty;
00111 }
00112 }
00113
00114
00115 int ffCalcGapPenalty(int hGap, int nGap, enum ffStringency stringency)
00116
00117 {
00118 switch (stringency)
00119 {
00120 case ffCdna:
00121 return ffCalcCdnaGapPenalty(hGap, nGap);
00122 case ffTight:
00123 return calcTightGap(hGap,nGap);
00124 case ffLoose:
00125 return calcLooseGap(hGap,nGap);
00126 default:
00127 errAbort("Unknown stringency type %d", stringency);
00128 return 0;
00129 }
00130 }
00131
00132
00133 int ffCdnaGapPenalty(struct ffAli *left, struct ffAli *right)
00134
00135 {
00136 int hGap = right->hStart - left->hEnd;
00137 int nGap = right->nStart - left->nEnd;
00138 return ffCalcCdnaGapPenalty(hGap, nGap);
00139 }
00140
00141 int ffGapPenalty(struct ffAli *left, struct ffAli *right, enum ffStringency stringency)
00142
00143 {
00144 int hGap = right->hStart - left->hEnd;
00145 int nGap = right->nStart - left->nEnd;
00146 return ffCalcGapPenalty(hGap, nGap, stringency);
00147 }
00148
00149 int ffScoreSomeAlis(struct ffAli *ali, int count, enum ffStringency stringency)
00150
00151 {
00152 int score = 0;
00153 int oneScore;
00154
00155 while (--count >= 0)
00156 {
00157 int len = ali->hEnd - ali->hStart;
00158 struct ffAli *right = ali->right;
00159 oneScore = dnaScoreMatch(ali->hStart, ali->nStart, len);
00160 score += oneScore;
00161 if (count > 0)
00162 score -= ffGapPenalty(ali, right,stringency);
00163 ali = right;
00164 }
00165 return score;
00166 }
00167
00168 int ffScoreSomething(struct ffAli *ali, enum ffStringency stringency,
00169 boolean isProt)
00170
00171 {
00172 int score = 0;
00173 int oneScore;
00174 int (*scoreMatch)(char *a, char *b, int size);
00175
00176 if (ali == NULL)
00177 return -0x7FFFFFFF;
00178 scoreMatch = (isProt ? aaScoreMatch : dnaScoreMatch );
00179 while (ali->left != NULL) ali = ali->left;
00180 while (ali != NULL)
00181 {
00182 int len = ali->hEnd - ali->hStart;
00183 struct ffAli *right = ali->right;
00184 oneScore = scoreMatch(ali->hStart, ali->nStart, len);
00185 score += oneScore;
00186 if (right)
00187 {
00188 score -= ffGapPenalty(ali, right, stringency);
00189 }
00190 ali = right;
00191 }
00192 return score;
00193 }
00194
00195 int ffScore(struct ffAli *ali, enum ffStringency stringency)
00196
00197 {
00198 return ffScoreSomething(ali, stringency, FALSE);
00199 }
00200
00201 int ffScoreCdna(struct ffAli *ali)
00202
00203
00204 {
00205 return ffScore(ali, ffCdna);
00206 }
00207
00208 int ffScoreProtein(struct ffAli *ali, enum ffStringency stringency)
00209
00210 {
00211 return ffScoreSomething(ali, stringency, TRUE);
00212 }
00213