00001
00002
00003
00004 #include "common.h"
00005 #include "linefile.h"
00006 #include "gapCalc.h"
00007
00008 static char const rcsid[] = "$Id: gapCalc.c,v 1.4 2006/06/18 23:09:36 kate Exp $";
00009
00010 struct gapCalc
00011
00012
00013 {
00014 int smallSize;
00015 int *qSmall;
00016 int *tSmall;
00017 int *bSmall;
00018 int *longPos;
00019 double *qLong;
00020 double *tLong;
00021 double *bLong;
00022 int longCount;
00023 int qPosCount;
00024 int tPosCount;
00025 int bPosCount;
00026 int qLastPos;
00027 int tLastPos;
00028 int bLastPos;
00029 double qLastPosVal;
00030 double tLastPosVal;
00031 double bLastPosVal;
00032 double qLastSlope;
00033 double tLastSlope;
00034 double bLastSlope;
00035 };
00036
00037
00038 static char *originalGapCosts =
00039 "tableSize 11\n"
00040 "smallSize 111\n"
00041 "position 1 2 3 11 111 2111 12111 32111 72111 152111 252111\n"
00042 "qGap 350 425 450 600 900 2900 22900 57900 117900 217900 317900\n"
00043 "tGap 350 425 450 600 900 2900 22900 57900 117900 217900 317900\n"
00044 "bothGap 750 825 850 1000 1300 3300 23300 58300 118300 218300 318300\n";
00045
00046
00047
00048 static char *defaultGapCosts =
00049 "tablesize 11\n"
00050 "smallSize 111\n"
00051 "position 1 2 3 11 111 2111 12111 32111 72111 152111 252111\n"
00052 "qGap 325 360 400 450 600 1100 3600 7600 15600 31600 56600\n"
00053 "tGap 325 360 400 450 600 1100 3600 7600 15600 31600 56600\n"
00054 "bothGap 625 660 700 750 900 1400 4000 8000 16000 32000 57000\n";
00055
00056
00057 static char *rnaDnaGapCosts =
00058 "tablesize 12\n"
00059 "smallSize 111\n"
00060 "position 1 2 3 11 31 111 2111 12111 32111 72111 152111 252111\n"
00061 "qGap 325 360 400 450 600 800 1100 3600 7600 15600 31600 56600\n"
00062 "tGap 200 210 220 250 300 400 500 600 800 1200 2000 4000\n"
00063 " bothGap 625 660 700 750 900 1100 1400 4000 8000 16000 32000 57000\n";
00064
00065 static char *cheapGapCosts =
00066 "tableSize 3\n"
00067 "smallSize 100\n"
00068 "position 1 100 1000\n"
00069 "qGap 0 30 300\n"
00070 "tGap 0 30 300\n"
00071 "bothGap 0 30 300\n";
00072
00073
00074 char *gapCalcSampleFileContents()
00075
00076 {
00077 return defaultGapCosts;
00078 }
00079
00080 static int interpolate(int x, int *s, double *v, int sCount)
00081
00082
00083 {
00084 int i, ds, ss;
00085 double dv;
00086 for (i=0; i<sCount; ++i)
00087 {
00088 ss = s[i];
00089 if (x == ss)
00090 return v[i];
00091 else if (x < ss)
00092 {
00093 ds = ss - s[i-1];
00094 dv = v[i] - v[i-1];
00095 return v[i-1] + dv * (x - s[i-1]) / ds;
00096 }
00097 }
00098
00099 ds = s[sCount-1] - s[sCount-2];
00100 dv = v[sCount-1] - v[sCount-2];
00101 return v[sCount-2] + dv * (x - s[sCount-2]) / ds;
00102 }
00103
00104 static double calcSlope(double y2, double y1, double x2, double x1)
00105
00106 {
00107 return (y2-y1)/(x2-x1);
00108 }
00109
00110 static void readTaggedNumLine(struct lineFile *lf, char *tag,
00111 int count, int *intOut, double *floatOut)
00112
00113
00114
00115
00116 {
00117 char *line;
00118 int i = 0;
00119 char *word;
00120 if (!lineFileNextReal(lf, &line))
00121 lineFileUnexpectedEnd(lf);
00122 word = nextWord(&line);
00123 if (!sameWord(tag, word))
00124 errAbort("Expecting %s got %s line %d of %s",
00125 tag, word, lf->lineIx, lf->fileName);
00126 for (i = 0; i < count; ++i)
00127 {
00128 word = nextWord(&line);
00129 if (word == NULL)
00130 errAbort("Not enough numbers line %d of %s", lf->lineIx, lf->fileName);
00131 if (!isdigit(word[0]))
00132 errAbort("Expecting number got %s line %d of %s",
00133 word, lf->lineIx, lf->fileName);
00134 if (intOut)
00135 intOut[i] = atoi(word);
00136 if (floatOut)
00137 floatOut[i] = atof(word);
00138 }
00139 word = nextWord(&line);
00140 if (word != NULL)
00141 errAbort("Too many numbers line %d of %s", lf->lineIx, lf->fileName);
00142 }
00143
00144 struct gapCalc *gapCalcRead(struct lineFile *lf)
00145
00146 {
00147 int i, tableSize, startLong = -1;
00148 struct gapCalc *gapCalc;
00149 int *gapInitPos;
00150 double *gapInitQGap;
00151 double *gapInitTGap;
00152 double *gapInitBothGap;
00153
00154 AllocVar(gapCalc);
00155
00156
00157 readTaggedNumLine(lf, "tableSize", 1, &tableSize, NULL);
00158 readTaggedNumLine(lf, "smallSize", 1, &gapCalc->smallSize, NULL);
00159 AllocArray(gapInitPos,tableSize);
00160 AllocArray(gapInitQGap,tableSize);
00161 AllocArray(gapInitTGap,tableSize);
00162 AllocArray(gapInitBothGap,tableSize);
00163 readTaggedNumLine(lf, "position", tableSize, gapInitPos, NULL);
00164 readTaggedNumLine(lf, "qGap", tableSize, NULL, gapInitQGap);
00165 readTaggedNumLine(lf, "tGap", tableSize, NULL, gapInitTGap);
00166 readTaggedNumLine(lf, "bothGap", tableSize, NULL, gapInitBothGap);
00167
00168
00169 AllocArray(gapCalc->qSmall, gapCalc->smallSize);
00170 AllocArray(gapCalc->tSmall, gapCalc->smallSize);
00171 AllocArray(gapCalc->bSmall, gapCalc->smallSize);
00172 for (i=1; i<gapCalc->smallSize; ++i)
00173 {
00174 gapCalc->qSmall[i] =
00175 interpolate(i, gapInitPos, gapInitQGap, tableSize);
00176 gapCalc->tSmall[i] =
00177 interpolate(i, gapInitPos, gapInitTGap, tableSize);
00178 gapCalc->bSmall[i] = interpolate(i, gapInitPos,
00179 gapInitBothGap, tableSize);
00180 }
00181
00182
00183 for (i=0; i<tableSize; ++i)
00184 {
00185 if (gapCalc->smallSize == gapInitPos[i])
00186 {
00187 startLong = i;
00188 break;
00189 }
00190 }
00191 if (startLong < 0)
00192 errAbort("No position %d in gapCalcRead()\n", gapCalc->smallSize);
00193 gapCalc->longCount = tableSize - startLong;
00194 gapCalc->qPosCount = tableSize - startLong;
00195 gapCalc->tPosCount = tableSize - startLong;
00196 gapCalc->bPosCount = tableSize - startLong;
00197 gapCalc->longPos = cloneMem(gapInitPos + startLong, gapCalc->longCount * sizeof(int));
00198 gapCalc->qLong = cloneMem(gapInitQGap + startLong, gapCalc->qPosCount * sizeof(double));
00199 gapCalc->tLong = cloneMem(gapInitTGap + startLong, gapCalc->tPosCount * sizeof(double));
00200 gapCalc->bLong = cloneMem(gapInitBothGap + startLong, gapCalc->bPosCount * sizeof(double));
00201
00202
00203 gapCalc->qLastPos = gapCalc->longPos[gapCalc->qPosCount-1];
00204 gapCalc->tLastPos = gapCalc->longPos[gapCalc->tPosCount-1];
00205 gapCalc->bLastPos = gapCalc->longPos[gapCalc->bPosCount-1];
00206 gapCalc->qLastPosVal = gapCalc->qLong[gapCalc->qPosCount-1];
00207 gapCalc->tLastPosVal = gapCalc->tLong[gapCalc->tPosCount-1];
00208 gapCalc->bLastPosVal = gapCalc->bLong[gapCalc->bPosCount-1];
00209 gapCalc->qLastSlope = calcSlope(gapCalc->qLastPosVal, gapCalc->qLong[gapCalc->qPosCount-2],
00210 gapCalc->qLastPos, gapCalc->longPos[gapCalc->qPosCount-2]);
00211 gapCalc->tLastSlope = calcSlope(gapCalc->tLastPosVal, gapCalc->tLong[gapCalc->tPosCount-2],
00212 gapCalc->tLastPos, gapCalc->longPos[gapCalc->tPosCount-2]);
00213 gapCalc->bLastSlope = calcSlope(gapCalc->bLastPosVal, gapCalc->bLong[gapCalc->bPosCount-2],
00214 gapCalc->bLastPos, gapCalc->longPos[gapCalc->bPosCount-2]);
00215 freez(&gapInitPos);
00216 freez(&gapInitQGap);
00217 freez(&gapInitTGap);
00218 freez(&gapInitBothGap);
00219 return gapCalc;
00220 }
00221
00222 struct gapCalc *gapCalcFromString(char *s)
00223
00224 {
00225 struct lineFile *lf = lineFileOnString("string", TRUE, cloneString(s));
00226 struct gapCalc *gapCalc = gapCalcRead(lf);
00227 lineFileClose(&lf);
00228 return gapCalc;
00229 }
00230
00231 struct gapCalc *gapCalcFromFile(char *fileName)
00232
00233 {
00234 struct gapCalc *gapCalc = NULL;
00235
00236 if (sameString(fileName, "loose"))
00237 {
00238 verbose(2, "using loose linear gap costs (chicken/human)\n");
00239 gapCalc = gapCalcFromString(defaultGapCosts);
00240 }
00241 else if (sameString(fileName, "medium"))
00242 {
00243 verbose(2, "using medium (original) linear gap costs (mouse/human)\n");
00244 gapCalc = gapCalcFromString(originalGapCosts);
00245 }
00246 else
00247 {
00248 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00249 gapCalc = gapCalcRead(lf);
00250 lineFileClose(&lf);
00251 }
00252 return gapCalc;
00253 }
00254
00255 struct gapCalc *gapCalcDefault()
00256
00257 {
00258 return gapCalcFromString(defaultGapCosts);
00259 }
00260
00261 struct gapCalc *gapCalcRnaDna()
00262
00263 {
00264 return gapCalcFromString(rnaDnaGapCosts);
00265 }
00266
00267 struct gapCalc *gapCalcCheap()
00268
00269 {
00270 return gapCalcFromString(cheapGapCosts);
00271 }
00272
00273 struct gapCalc *gapCalcOriginal()
00274
00275 {
00276 return gapCalcFromString(originalGapCosts);
00277 }
00278
00279 void gapCalcFree(struct gapCalc **pGapCalc)
00280
00281 {
00282 struct gapCalc *gapCalc = *pGapCalc;
00283 if (gapCalc != NULL)
00284 {
00285 freeMem(gapCalc->qSmall);
00286 freeMem(gapCalc->tSmall);
00287 freeMem(gapCalc->bSmall);
00288 freeMem(gapCalc->longPos);
00289 freeMem(gapCalc->qLong);
00290 freeMem(gapCalc->tLong);
00291 freeMem(gapCalc->bLong);
00292 freez(pGapCalc);
00293 }
00294 }
00295
00296 int gapCalcCost(struct gapCalc *gapCalc, int dq, int dt)
00297
00298 {
00299 if (dt < 0) dt = 0;
00300 if (dq < 0) dq = 0;
00301 if (dt == 0)
00302 {
00303 if (dq < gapCalc->smallSize)
00304 return gapCalc->qSmall[dq];
00305 else if (dq >= gapCalc->qLastPos)
00306 return gapCalc->qLastPosVal + gapCalc->qLastSlope * (dq-gapCalc->qLastPos);
00307 else
00308 return interpolate(dq, gapCalc->longPos, gapCalc->qLong, gapCalc->qPosCount);
00309 }
00310 else if (dq == 0)
00311 {
00312 if (dt < gapCalc->smallSize)
00313 return gapCalc->tSmall[dt];
00314 else if (dt >= gapCalc->tLastPos)
00315 return gapCalc->tLastPosVal + gapCalc->tLastSlope * (dt-gapCalc->tLastPos);
00316 else
00317 return interpolate(dt, gapCalc->longPos, gapCalc->tLong, gapCalc->tPosCount);
00318 }
00319 else
00320 {
00321 int both = dq + dt;
00322 if (both < gapCalc->smallSize)
00323 return gapCalc->bSmall[both];
00324 else if (both >= gapCalc->bLastPos)
00325 return gapCalc->bLastPosVal + gapCalc->bLastSlope * (both-gapCalc->bLastPos);
00326 else
00327 return interpolate(both, gapCalc->longPos, gapCalc->bLong, gapCalc->bPosCount);
00328 }
00329 }
00330
00331 void gapCalcTest(struct gapCalc *gapCalc)
00332
00333 {
00334 int i;
00335 for (i=1; i<=10; i++)
00336 {
00337 verbose(1, "%d: %d %d %d\n", i, gapCalcCost(gapCalc, i, 0),
00338 gapCalcCost(gapCalc, 0, i), gapCalcCost(gapCalc, i/2, i-i/2));
00339 }
00340 for (i=1; ; i *= 10)
00341 {
00342 verbose(1, "%d: %d %d %d\n", i, gapCalcCost(gapCalc, i, 0), gapCalcCost(gapCalc, 0, i),
00343 gapCalcCost(gapCalc, i/2, i-i/2));
00344 if (i == 1000000000)
00345 break;
00346 }
00347 verbose(1, "%d %d cost %d\n", 6489540, 84240, gapCalcCost(gapCalc, 84240, 6489540));
00348 verbose(1, "%d %d cost %d\n", 2746361, 1075188, gapCalcCost(gapCalc, 1075188, 2746361));
00349 verbose(1, "%d %d cost %d\n", 6489540 + 2746361 + 72, 84240 + 1075188 + 72, gapCalcCost(gapCalc, 84240 + 1075188 + 72, 6489540 + 2746361 + 72));
00350 }
00351
00352