lib/ffScore.c

Go to the documentation of this file.
00001 /* ffScore - stuff to score ffAli alignments. */
00002 
00003 #include "common.h"
00004 #include "dnautil.h"
00005 #include "obscure.h"
00006 #include "fuzzyFind.h"
00007 
00008 static char const rcsid[] = "$Id: ffScore.c,v 1.5 2003/10/21 22:06:32 kent Exp $";
00009 
00010 int ffIntronMax = ffIntronMaxDefault;
00011 
00012 void setFfIntronMax(int value)
00013 {
00014 ffIntronMax = value;
00015 }
00016 
00017 int ffScoreMatch(DNA *a, DNA *b, int size)
00018 /* Compare two pieces of DNA base by base. Total mismatches are
00019  * subtracted from total matches and returned as score. 'N's 
00020  * neither hurt nor help score. */
00021 {
00022 int i;
00023 int score = 0;
00024 for (i=0; i<size; ++i)
00025     {
00026     DNA aa = a[i];
00027     DNA bb = b[i];
00028     if (aa == 'n' || bb == 'n')
00029         continue;
00030     if (aa == bb)
00031         ++score;
00032     else
00033         score -= 1;
00034     }
00035 return score;
00036 }
00037 
00038 int ffCalcCdnaGapPenalty(int hGap, int nGap)
00039 /* Return gap penalty for given h and n gaps. */
00040 {
00041 int acc = 2;
00042 if (hGap > 400000)      /* Discourage really long introns. */
00043     {
00044     acc += (hGap - 400000)/3000;
00045     if (hGap > ffIntronMax)
00046         acc += (hGap - ffIntronMax)/2000;
00047     }
00048 if (hGap < 0)   /* Discourage jumping back in haystack. */
00049     {
00050     hGap = -8*hGap;
00051     if (hGap > 48)
00052         hGap = (hGap*hGap);
00053     }
00054 if (nGap < 0)   /* Jumping back in needle gets rid of previous alignment. */
00055     {
00056     acc += -nGap;
00057     nGap = 0;
00058     }
00059 acc += digitsBaseTwo(hGap)/2;
00060 if (nGap != 0)
00061     {
00062     acc += digitsBaseTwo(nGap);
00063     }
00064 else
00065     {
00066     if (hGap > 30)
00067         acc -= 1;
00068     }
00069 return acc;
00070 }
00071 
00072 static int calcTightGap(int hGap, int nGap)
00073 /* Figure out gap penalty using tight model (gaps bad!) */
00074 {
00075 if (hGap == 0 && nGap == 0)
00076     return 0;
00077 else
00078     {
00079     int overlap = min(hGap, nGap);
00080     int penalty = 8;
00081     if (overlap < 0)
00082         overlap = 0;
00083 
00084     if (hGap < 0)
00085         hGap = -8*hGap;
00086     if (nGap < 0)
00087         nGap = -2*nGap;
00088     penalty += (hGap-overlap + nGap-overlap) + overlap;
00089     return penalty;
00090     }
00091 }
00092 
00093 static int calcLooseGap(int hGap, int nGap)
00094 /* Figure out gap penalty using loose model (gaps not so bad) */
00095 {
00096 if (hGap == 0 && nGap == 0)
00097     return 0;
00098 else
00099     {
00100     int overlap = min(hGap, nGap);
00101     int penalty = 8;
00102     if (overlap < 0)
00103         overlap = 0;
00104 
00105     if (hGap < 0)
00106         hGap = -8*hGap;
00107     if (nGap < 0)
00108         nGap = -2*nGap;
00109     penalty += log(hGap-overlap+1) + log(nGap-overlap+1);
00110     return penalty;
00111     }
00112 }
00113 
00114 
00115 int ffCalcGapPenalty(int hGap, int nGap, enum ffStringency stringency)
00116 /* Return gap penalty for given h and n gaps. */
00117 {
00118 switch (stringency)
00119     {
00120     case ffCdna:
00121         return ffCalcCdnaGapPenalty(hGap, nGap);
00122     case ffTight:
00123         return calcTightGap(hGap,nGap);
00124     case ffLoose:
00125         return calcLooseGap(hGap,nGap);
00126     default:
00127         errAbort("Unknown stringency type %d", stringency);
00128         return 0;
00129     }
00130 }
00131 
00132 
00133 int ffCdnaGapPenalty(struct ffAli *left, struct ffAli *right)
00134 /* What is penalty for gap between two. */
00135 {
00136 int hGap = right->hStart - left->hEnd;
00137 int nGap = right->nStart - left->nEnd;
00138 return ffCalcCdnaGapPenalty(hGap, nGap);
00139 }
00140 
00141 int ffGapPenalty(struct ffAli *left, struct ffAli *right, enum ffStringency stringency)
00142 /* What is penalty for gap between two given stringency? */
00143 {
00144 int hGap = right->hStart - left->hEnd;
00145 int nGap = right->nStart - left->nEnd;
00146 return ffCalcGapPenalty(hGap, nGap, stringency);
00147 }
00148 
00149 int ffScoreSomeAlis(struct ffAli *ali, int count, enum ffStringency stringency)
00150 /* Figure out score of count alis. */
00151 {
00152 int score = 0;
00153 int oneScore;
00154 
00155 while (--count >= 0)
00156     {
00157     int len = ali->hEnd - ali->hStart;
00158     struct ffAli *right = ali->right;
00159     oneScore = dnaScoreMatch(ali->hStart, ali->nStart, len);
00160     score += oneScore;
00161     if (count > 0)  /* Calculate gap penalty */
00162         score -= ffGapPenalty(ali, right,stringency);
00163     ali = right;
00164     }
00165 return score;
00166 }
00167 
00168 int ffScoreSomething(struct ffAli *ali, enum ffStringency stringency,
00169    boolean isProt)
00170 /* Score alignment. */
00171 {
00172 int score = 0;
00173 int oneScore;
00174 int (*scoreMatch)(char *a, char *b, int size);
00175 
00176 if (ali == NULL)
00177     return -0x7FFFFFFF;
00178 scoreMatch = (isProt ? aaScoreMatch : dnaScoreMatch );
00179 while (ali->left != NULL) ali = ali->left;
00180 while (ali != NULL)
00181     {
00182     int len = ali->hEnd - ali->hStart;
00183     struct ffAli *right = ali->right;
00184     oneScore = scoreMatch(ali->hStart, ali->nStart, len);
00185     score += oneScore;
00186     if (right)  /* Calculate gap penalty */
00187         {
00188         score -= ffGapPenalty(ali, right, stringency);
00189         }
00190     ali = right;
00191     }
00192 return score;
00193 }
00194 
00195 int ffScore(struct ffAli *ali, enum ffStringency stringency)
00196 /* Score alignment. */
00197 {
00198 return ffScoreSomething(ali, stringency, FALSE);
00199 }
00200 
00201 int ffScoreCdna(struct ffAli *ali)
00202 /* Figure out overall score of this alignment. 
00203  * Perfect match is number of bases in needle. */
00204 {
00205 return ffScore(ali, ffCdna);
00206 }
00207 
00208 int ffScoreProtein(struct ffAli *ali, enum ffStringency stringency)
00209 /* Figure out overall score of protein alignment. */
00210 {
00211 return ffScoreSomething(ali, stringency, TRUE);
00212 }
00213 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2