00001
00002
00003
00004
00005
00006
00007 #include "common.h"
00008 #include "dnautil.h"
00009 #include "memgfx.h"
00010 #include "sig.h"
00011 #include "fuzzyFind.h"
00012 #include "cda.h"
00013
00014 static char const rcsid[] = "$Id: cda.c,v 1.6 2003/05/06 07:33:41 kate Exp $";
00015
00016 char *cdaLoadString(FILE *f)
00017
00018 {
00019 UBYTE size;
00020 char *s;
00021 if (!readOne(f, size))
00022 return NULL;
00023 s = needMem(size+1);
00024 mustRead(f, s, size);
00025 return s;
00026 }
00027
00028 static void cdaWriteString(FILE *f, char *s)
00029
00030 {
00031 int size;
00032 UBYTE usize;
00033 if (s == NULL)
00034 s = "";
00035 size = strlen(s);
00036 usize = (UBYTE)size;
00037 assert(size < 256);
00038 writeOne(f, usize);
00039 mustWrite(f, s, size);
00040 }
00041
00042 void cdaReadBlock(FILE *f, struct cdaBlock *block)
00043
00044 {
00045 mustReadOne(f, block->nStart);
00046 mustReadOne(f, block->nEnd);
00047 mustReadOne(f, block->hStart);
00048 block->hEnd = block->hStart + (block->nEnd - block->nStart);
00049 mustReadOne(f, block->startGood);
00050 mustReadOne(f, block->endGood);
00051 mustReadOne(f, block->midScore);
00052 }
00053
00054 static void cdaWriteBlock(FILE *f, struct cdaBlock *block)
00055
00056 {
00057 writeOne(f, block->nStart);
00058 writeOne(f, block->nEnd);
00059 writeOne(f, block->hStart);
00060 writeOne(f, block->startGood);
00061 writeOne(f, block->endGood);
00062 writeOne(f, block->midScore);
00063 }
00064
00065 void cdaFixChromStartEnd(struct cdaAli *cda)
00066
00067
00068 {
00069 int start = 0x7fffffff;
00070 int end = -start;
00071 int count = cda->blockCount;
00072 struct cdaBlock *block = cda->blocks;
00073
00074 while (--count >= 0)
00075 {
00076 if (start > block->hStart)
00077 start = block->hStart;
00078 if (end < block->hEnd)
00079 end = block->hEnd;
00080 ++block;
00081 }
00082 cda->chromStart = start;
00083 cda->chromEnd = end;
00084 }
00085
00086 FILE *cdaOpenVerify(char *fileName)
00087
00088
00089 {
00090 FILE *aliFile;
00091 bits32 sig;
00092
00093 aliFile = mustOpen(fileName, "rb");
00094 mustReadOne(aliFile, sig);
00095 if (sig != aliSig)
00096 errAbort("Bad signature %x on %s", sig, fileName);
00097 return aliFile;
00098 }
00099
00100 static FILE *cdaCreate(char *fileName)
00101
00102 {
00103 bits32 sig = aliSig;
00104 FILE *f = mustOpen(fileName, "wb");
00105 writeOne(f, sig);
00106 return f;
00107 }
00108
00109 struct cdaAli *cdaLoadOne(FILE *f)
00110
00111 {
00112 struct cdaAli *ca;
00113 struct cdaBlock *blocks;
00114 int count;
00115 int i;
00116 char *name;
00117 UBYTE chromIx;
00118
00119 if ((name = cdaLoadString(f)) == NULL)
00120 return NULL;
00121 AllocVar(ca);
00122 ca->name = name;
00123 mustReadOne(f, ca->isEmbryonic);
00124 mustReadOne(f, ca->baseCount);
00125 mustReadOne(f, ca->milliScore);
00126 mustReadOne(f, chromIx);
00127 ca->chromIx = chromIx;
00128 mustReadOne(f, ca->strand);
00129 mustReadOne(f, ca->direction);
00130 mustReadOne(f, ca->blockCount);
00131 count = ca->blockCount;
00132 ca->blocks = blocks = needMem(count * sizeof(blocks[0]));
00133 for (i=0; i<count; ++i)
00134 cdaReadBlock(f, &blocks[i]);
00135 cdaFixChromStartEnd(ca);
00136 return ca;
00137 }
00138
00139 static void cdaWriteOne(FILE *f, struct cdaAli *ca)
00140
00141 {
00142 int count;
00143 int i;
00144 struct cdaBlock *blocks;
00145 UBYTE chromIx;
00146
00147 cdaWriteString(f, ca->name);
00148 writeOne(f, ca->isEmbryonic);
00149 writeOne(f, ca->baseCount);
00150 writeOne(f, ca->milliScore);
00151 if (ca->chromIx > 255)
00152 errAbort("chromIx too big in cdaWriteOne.");
00153 chromIx = (UBYTE)ca->chromIx;
00154 writeOne(f, chromIx);
00155 writeOne(f, ca->strand);
00156 writeOne(f, ca->direction);
00157 writeOne(f, ca->blockCount);
00158 count = ca->blockCount;
00159 blocks = ca->blocks;
00160 for (i=0; i<count; ++i)
00161 cdaWriteBlock(f, &blocks[i]);
00162 }
00163
00164 void cdaWrite(char *fileName, struct cdaAli *cdaList)
00165
00166 {
00167 FILE *f = cdaCreate(fileName);
00168 struct cdaAli *ca;
00169 for (ca = cdaList; ca != NULL; ca = ca->next)
00170 cdaWriteOne(f, ca);
00171 fclose(f);
00172 }
00173
00174 boolean cdaCloneIsReverse(struct cdaAli *cda)
00175
00176 {
00177 boolean isReverse = (cda->direction == '-');
00178 if (cda->strand == '-')
00179 isReverse = !isReverse;
00180 return isReverse;
00181 }
00182
00183 char cdaCloneStrand(struct cdaAli *cda)
00184
00185 {
00186 return cdaCloneIsReverse(cda) ? '-' : '+';
00187 }
00188
00189 char cdaDirChar(struct cdaAli *cda, char chromStrand)
00190
00191
00192 {
00193 boolean isReverse = cdaCloneIsReverse(cda);
00194 if (chromStrand == '-')
00195 isReverse = !isReverse;
00196 return (isReverse ? '<' : '>');
00197 }
00198
00199 void cdaRcOne(struct cdaAli *cda, int dnaStart, int baseCount)
00200
00201 {
00202 struct cdaBlock *block, *endBlock;
00203
00204 for (block = cda->blocks, endBlock = block+cda->blockCount; block < endBlock; ++block)
00205 {
00206 int temp;
00207 UBYTE uc;
00208 temp = reverseOffset(block->hStart-dnaStart, baseCount) + dnaStart + 1;
00209 block->hStart = reverseOffset(block->hEnd-dnaStart, baseCount) + dnaStart + 1;
00210 block->hEnd = temp;
00211 temp = reverseOffset(block->nStart, cda->baseCount);
00212 block->nStart = reverseOffset(block->nEnd, cda->baseCount);
00213 block->nEnd = temp;
00214 uc = block->startGood;
00215 block->startGood = block->endGood;
00216 block->endGood = uc;
00217 }
00218 block = cda->blocks;
00219 endBlock -= 1;
00220 for (;block < endBlock; ++block, --endBlock)
00221 {
00222 struct cdaBlock temp;
00223 temp = *block;
00224 *block = *endBlock;
00225 *endBlock = temp;
00226 }
00227 }
00228
00229 void cdaRcList(struct cdaAli *cdaList, int dnaStart, int baseCount)
00230
00231 {
00232 struct cdaAli *cda;
00233 for (cda = cdaList; cda != NULL; cda = cda->next)
00234 cdaRcOne(cda, dnaStart, baseCount);
00235 }
00236
00237
00238 void cdaFreeAli(struct cdaAli *ca)
00239
00240 {
00241 freeMem(ca->blocks);
00242 freeMem(ca->name);
00243 freeMem(ca);
00244 }
00245
00246 void cdaFreeAliList(struct cdaAli **pList)
00247
00248 {
00249 struct cdaAli *ca, *next;
00250 next = *pList;
00251 while ((ca = next) != NULL)
00252 {
00253 next = ca->next;
00254 cdaFreeAli(ca);
00255 }
00256 *pList = NULL;
00257 }
00258
00259
00260 static void cdaCoalesceOne(struct cdaAli *ca, boolean updateScore)
00261
00262 {
00263 struct cdaBlock *left = ca->blocks;
00264 struct cdaBlock *block = left+1;
00265 struct cdaBlock *writeBlock = block;
00266 int readCount = ca->blockCount;
00267 int i;
00268
00269
00270 for (i=1; i<readCount; ++i)
00271 {
00272 int hGap = intAbs(block->hStart - left->hEnd);
00273 int nGap = intAbs(block->nStart - left->nEnd);
00274 if (hGap > 2 || nGap > 2)
00275 {
00276 left = writeBlock;
00277 *writeBlock++ = *block;
00278 }
00279 else
00280 {
00281 int leftMatch, blockMatch;
00282 int totalMatch;
00283
00284
00285 if (updateScore)
00286 {
00287 leftMatch = roundingScale(left->midScore, left->nEnd-left->nStart, 255);
00288 blockMatch = roundingScale(block->midScore, block->nEnd-block->nStart, 255);
00289 totalMatch = leftMatch + blockMatch - nGap - hGap;
00290 left->midScore = roundingScale(totalMatch, 255, block->nEnd-left->nStart);
00291 }
00292
00293
00294 left->hEnd = block->hEnd;
00295 left->nEnd = block->nEnd;
00296 left->endGood = block->endGood;
00297 }
00298 ++block;
00299 }
00300 ca->blockCount = writeBlock - ca->blocks;
00301 }
00302
00303 void cdaCoalesceAll(struct cdaAli *ca, boolean updateScore)
00304
00305 {
00306 for (;ca != NULL; ca = ca->next)
00307 cdaCoalesceOne(ca, updateScore);
00308 }
00309
00310 void cdaCoalesceBlocks(struct cdaAli *ca)
00311
00312 {
00313 cdaCoalesceAll(ca, TRUE);
00314 }
00315
00316 void cdaCoalesceFast(struct cdaAli *ca)
00317
00318 {
00319 cdaCoalesceAll(ca, FALSE);
00320 }
00321
00322
00323 void cdaShowAlignmentTrack(struct memGfx *mg,
00324 int xOff, int yOff, int width, int height, Color goodColor, Color badColor,
00325 int dnaSize, int dnaOffset, struct cdaAli *cda, char repeatChar)
00326
00327 {
00328 struct cdaBlock *block = cda->blocks;
00329 int count = cda->blockCount;
00330 int blockIx;
00331 int scaledHeight = roundingScale(height, dnaSize, width);
00332 MgFont *font = NULL;
00333 int repeatCharWidth = 0;
00334
00335 if (repeatChar)
00336 {
00337 font = mgSmallFont();
00338 repeatCharWidth = mgFontCharWidth(font, repeatChar);
00339 }
00340
00341 for (blockIx = 0; blockIx < count; ++blockIx)
00342 {
00343 int x[4];
00344 int b[4];
00345 Color c[3];
00346 int quarterWid;
00347 int i;
00348 int w;
00349
00350 b[0] = block->hStart;
00351 b[3] = block->hEnd;
00352 quarterWid = (b[3]-b[0]+2)>>2;
00353 if (quarterWid > scaledHeight)
00354 quarterWid = scaledHeight;
00355 b[1] = b[0] + quarterWid;
00356 b[2] = b[3] - quarterWid;
00357
00358 c[0] = ((block->startGood >= 4 && (blockIx == 0 || block[-1].nEnd == block[0].nStart))
00359 ? goodColor : badColor);
00360 c[1] = ((block->midScore > 229) ? goodColor : badColor);
00361 c[2] = ((block->endGood >= 4 && (blockIx == count-1 || block[0].nEnd == block[1].nStart))
00362 ? goodColor : badColor);
00363
00364 for (i=0; i<4; ++i)
00365 x[i] = roundingScale(b[i]-dnaOffset, width, dnaSize) + xOff;
00366 for (i=0; i<3; ++i)
00367 {
00368 if ((w = x[i+1] - x[i]) > 0)
00369 mgDrawBox(mg, x[i], yOff, w, height, c[i]);
00370 }
00371
00372 if (repeatChar)
00373 {
00374 char buf[256];
00375 int charCount;
00376 w = x[3] - x[0];
00377 charCount = w/repeatCharWidth;
00378 if (charCount >= sizeof(buf))
00379 charCount = sizeof(buf)-1;
00380 memset(buf, repeatChar, charCount);
00381 buf[charCount] = 0;
00382 mgTextCentered(mg, x[0], yOff, w, height, MG_WHITE, font, buf);
00383 }
00384 ++block;
00385 }
00386 }
00387
00388
00389 static UBYTE leftGood(struct ffAli *ali)
00390 {
00391 DNA *n = ali->nStart;
00392 DNA *h = ali->hStart;
00393 int size = ali->nEnd - ali->nStart;
00394 int matchSize = 0;
00395
00396 while (--size >= 0)
00397 {
00398 if (*n++ != *h++)
00399 break;
00400 ++matchSize;
00401 }
00402 if (matchSize > 255)
00403 matchSize = 255;
00404 return (UBYTE)matchSize;
00405 }
00406
00407 static UBYTE rightGood(struct ffAli *ali)
00408 {
00409 DNA *n = ali->nEnd;
00410 DNA *h = ali->hEnd;
00411 int size = ali->nEnd - ali->nStart;
00412 int matchSize = 0;
00413
00414 while (--size >= 0)
00415 {
00416 if (*--n != *--h)
00417 break;
00418 ++matchSize;
00419 }
00420 if (matchSize > 255)
00421 matchSize = 255;
00422 return (UBYTE)matchSize;
00423 }
00424
00425
00426 static int countAlis(struct ffAli *ali)
00427 {
00428 int count = 0;
00429 while (ali != NULL)
00430 {
00431 ++count;
00432 ali = ali->right;
00433 }
00434 return count;
00435 }
00436
00437 struct cdaAli *cdaAliFromFfAli(struct ffAli *aliList,
00438 DNA *needle, int needleSize, DNA *hay, int haySize, boolean isRc)
00439
00440 {
00441 int count = countAlis(aliList);
00442 struct cdaAli *cda;
00443 struct cdaBlock *blocks;
00444 struct ffAli *fa;
00445 int score;
00446 int bases;
00447
00448 if (isRc)
00449 reverseComplement(needle, needleSize);
00450 AllocVar(cda);
00451 cda->baseCount = needleSize;
00452 cda->strand = (isRc ? '-' : '+');
00453 cda->direction = '+';
00454 cda->orientation = (isRc ? -1 : 1);
00455 cda->blockCount = count;
00456 cda->blocks = blocks = needMem(count * sizeof(blocks[0]));
00457
00458 for (fa = aliList; fa != NULL; fa = fa->right)
00459 {
00460 blocks->nStart = fa->nStart - needle;
00461 blocks->nEnd = fa->nEnd - needle;
00462 blocks->hStart = fa->hStart - hay;
00463 blocks->hEnd = fa->hEnd - hay;
00464 blocks->startGood = leftGood(fa);
00465 blocks->endGood = rightGood(fa);
00466 bases = fa->nEnd - fa->nStart;
00467 score = dnaScoreMatch(fa->nStart, fa->hStart, bases);
00468 blocks->midScore = roundingScale(255, score, bases);
00469 ++blocks;
00470 }
00471 cdaFixChromStartEnd(cda);
00472 if (isRc)
00473 {
00474 reverseComplement(needle, needleSize);
00475 }
00476 return cda;
00477 }
00478