lib/cda.c

Go to the documentation of this file.
00001 /* cda.c - cDNA Alignment structure.  This stores all the info except
00002  * the bases themselves on an cDNA alignment. 
00003  * 
00004  * This file is copyright 2002 Jim Kent, but license is hereby
00005  * granted for all use - public, private or commercial. */
00006 
00007 #include "common.h"
00008 #include "dnautil.h"
00009 #include "memgfx.h"
00010 #include "sig.h"
00011 #include "fuzzyFind.h"
00012 #include "cda.h"
00013 
00014 static char const rcsid[] = "$Id: cda.c,v 1.6 2003/05/06 07:33:41 kate Exp $";
00015 
00016 char *cdaLoadString(FILE *f)
00017 /* Load in a string from CDA file. */
00018 {
00019 UBYTE size;
00020 char *s;
00021 if (!readOne(f, size))
00022     return NULL;
00023 s = needMem(size+1);
00024 mustRead(f, s, size);
00025 return s;
00026 }
00027 
00028 static void cdaWriteString(FILE *f, char *s)
00029 /* Write string out to CDA file. */
00030 {
00031 int size;
00032 UBYTE usize;
00033 if (s == NULL)
00034     s = "";
00035 size = strlen(s);
00036 usize = (UBYTE)size;
00037 assert(size < 256);
00038 writeOne(f, usize);
00039 mustWrite(f, s, size);
00040 }
00041 
00042 void cdaReadBlock(FILE *f, struct cdaBlock *block)
00043 /* Read one block from cda file. */
00044 {
00045 mustReadOne(f, block->nStart);
00046 mustReadOne(f, block->nEnd);
00047 mustReadOne(f, block->hStart);
00048 block->hEnd = block->hStart + (block->nEnd - block->nStart);
00049 mustReadOne(f, block->startGood);
00050 mustReadOne(f, block->endGood);
00051 mustReadOne(f, block->midScore);
00052 }
00053 
00054 static void cdaWriteBlock(FILE *f, struct cdaBlock *block)
00055 /* Write one block to cda file. */
00056 {
00057 writeOne(f, block->nStart);
00058 writeOne(f, block->nEnd);
00059 writeOne(f, block->hStart);
00060 writeOne(f, block->startGood);
00061 writeOne(f, block->endGood);
00062 writeOne(f, block->midScore);
00063 }
00064 
00065 void cdaFixChromStartEnd(struct cdaAli *cda)
00066 /* Loop through blocks and figure out and fill in chromStart
00067  * and chromEnd. */
00068 {
00069 int start = 0x7fffffff;
00070 int end = -start;
00071 int count = cda->blockCount;
00072 struct cdaBlock *block = cda->blocks;
00073 
00074 while (--count >= 0)
00075     {
00076     if (start > block->hStart)
00077         start = block->hStart;
00078     if (end < block->hEnd)
00079         end = block->hEnd;
00080     ++block;
00081     }
00082 cda->chromStart = start;
00083 cda->chromEnd = end;
00084 }
00085 
00086 FILE *cdaOpenVerify(char *fileName)
00087 /* Call this to open file and verify signature, then call cdaLoadOne
00088  * which returns NULL at EOF. */
00089 {
00090 FILE *aliFile;
00091 bits32 sig;
00092 
00093 aliFile = mustOpen(fileName, "rb");
00094 mustReadOne(aliFile, sig);
00095 if (sig != aliSig)
00096     errAbort("Bad signature %x on %s", sig, fileName);
00097 return aliFile;
00098 }
00099 
00100 static FILE *cdaCreate(char *fileName)
00101 /* Open file to write and put out signature. */
00102 {
00103 bits32 sig = aliSig;
00104 FILE *f = mustOpen(fileName, "wb");
00105 writeOne(f, sig);
00106 return f;
00107 }
00108 
00109 struct cdaAli *cdaLoadOne(FILE *f)
00110 /* Load one cdaAli from file.  Assumes file pointer is correctly positioned. */
00111 {
00112 struct cdaAli *ca;
00113 struct cdaBlock *blocks;
00114 int count;
00115 int i;
00116 char *name;
00117 UBYTE chromIx;
00118 
00119 if ((name = cdaLoadString(f)) == NULL)
00120     return NULL;
00121 AllocVar(ca);
00122 ca->name = name;
00123 mustReadOne(f, ca->isEmbryonic);
00124 mustReadOne(f, ca->baseCount);
00125 mustReadOne(f, ca->milliScore);
00126 mustReadOne(f, chromIx);
00127 ca->chromIx = chromIx;
00128 mustReadOne(f, ca->strand);
00129 mustReadOne(f, ca->direction);
00130 mustReadOne(f, ca->blockCount);
00131 count = ca->blockCount;
00132 ca->blocks = blocks = needMem(count * sizeof(blocks[0]));
00133 for (i=0; i<count; ++i)
00134     cdaReadBlock(f, &blocks[i]);
00135 cdaFixChromStartEnd(ca);
00136 return ca;
00137 }
00138 
00139 static void cdaWriteOne(FILE *f, struct cdaAli *ca)
00140 /* Write one cdaAli to file. */
00141 {
00142 int count;
00143 int i;
00144 struct cdaBlock *blocks;
00145 UBYTE chromIx;
00146 
00147 cdaWriteString(f, ca->name);
00148 writeOne(f, ca->isEmbryonic);
00149 writeOne(f, ca->baseCount);
00150 writeOne(f, ca->milliScore);
00151 if (ca->chromIx > 255)
00152     errAbort("chromIx too big in cdaWriteOne.");
00153 chromIx = (UBYTE)ca->chromIx;
00154 writeOne(f, chromIx);
00155 writeOne(f, ca->strand);
00156 writeOne(f, ca->direction);
00157 writeOne(f, ca->blockCount);
00158 count = ca->blockCount;
00159 blocks = ca->blocks;
00160 for (i=0; i<count; ++i)
00161     cdaWriteBlock(f, &blocks[i]);
00162 }
00163 
00164 void cdaWrite(char *fileName, struct cdaAli *cdaList)
00165 /* Write out a cdaList to a cda file. */
00166 {
00167 FILE *f = cdaCreate(fileName);
00168 struct cdaAli *ca;
00169 for (ca = cdaList; ca != NULL; ca = ca->next)
00170     cdaWriteOne(f, ca);
00171 fclose(f);
00172 }
00173 
00174 boolean cdaCloneIsReverse(struct cdaAli *cda)
00175 /* Returns TRUE if clone (.3/.5 pair) aligns on reverse strand. */
00176 {
00177 boolean isReverse = (cda->direction == '-');
00178 if (cda->strand == '-')
00179     isReverse = !isReverse;
00180 return isReverse;
00181 }
00182 
00183 char cdaCloneStrand(struct cdaAli *cda)
00184 /* Return '+' or '-' depending on the strand that clone (.3/.5 pair) aligns on. */
00185 {
00186 return cdaCloneIsReverse(cda) ? '-' : '+';
00187 }
00188 
00189 char cdaDirChar(struct cdaAli *cda, char chromStrand)
00190 /* Return '>' or '<' or ' ' depending whether cDNA is going same, opposite, or
00191  * unknown alignment as the chromosome strand. */
00192 {
00193 boolean isReverse = cdaCloneIsReverse(cda);
00194 if (chromStrand == '-')
00195     isReverse = !isReverse;
00196 return (isReverse ? '<' : '>');    
00197 }
00198 
00199 void cdaRcOne(struct cdaAli *cda, int dnaStart, int baseCount)
00200 /* Reverse complement one cda. DnaStart is typically display window start. */
00201 {
00202 struct cdaBlock *block, *endBlock;
00203 
00204 for (block = cda->blocks, endBlock = block+cda->blockCount; block < endBlock; ++block)
00205     {
00206     int temp;
00207     UBYTE uc;
00208     temp = reverseOffset(block->hStart-dnaStart, baseCount) + dnaStart + 1;
00209     block->hStart = reverseOffset(block->hEnd-dnaStart, baseCount) + dnaStart + 1;
00210     block->hEnd = temp;
00211     temp = reverseOffset(block->nStart, cda->baseCount);
00212     block->nStart = reverseOffset(block->nEnd, cda->baseCount);
00213     block->nEnd = temp;
00214     uc = block->startGood;
00215     block->startGood = block->endGood;
00216     block->endGood = uc;
00217     }
00218 block = cda->blocks;
00219 endBlock -= 1;
00220 for (;block < endBlock; ++block, --endBlock)
00221     {
00222     struct cdaBlock temp;
00223     temp = *block;
00224     *block = *endBlock;
00225     *endBlock = temp;
00226     }
00227 }
00228 
00229 void cdaRcList(struct cdaAli *cdaList, int dnaStart, int baseCount)
00230 /* Reverse complement cda list. */
00231 {
00232 struct cdaAli *cda;
00233 for (cda = cdaList; cda != NULL; cda = cda->next)
00234     cdaRcOne(cda, dnaStart, baseCount);
00235 }
00236 
00237 
00238 void cdaFreeAli(struct cdaAli *ca)
00239 /* Free a single cdaAli. */
00240 {
00241 freeMem(ca->blocks);
00242 freeMem(ca->name);
00243 freeMem(ca);
00244 }
00245 
00246 void cdaFreeAliList(struct cdaAli **pList)
00247 /* Free list of cdaAli. */
00248 {
00249 struct cdaAli *ca, *next;
00250 next = *pList;
00251 while ((ca = next) != NULL)
00252     {
00253     next = ca->next;
00254     cdaFreeAli(ca);
00255     }
00256 *pList = NULL;
00257 }
00258 
00259 
00260 static void cdaCoalesceOne(struct cdaAli *ca, boolean updateScore)
00261 /* Coalesce blocks separated by small amounts of noise. */
00262 {
00263 struct cdaBlock *left = ca->blocks;
00264 struct cdaBlock *block = left+1;
00265 struct cdaBlock *writeBlock = block;
00266 int readCount = ca->blockCount;
00267 int i;
00268 
00269 /* Implicitly have read and written first one. */
00270 for (i=1; i<readCount; ++i)
00271     {
00272     int hGap = intAbs(block->hStart - left->hEnd);
00273     int nGap = intAbs(block->nStart - left->nEnd);
00274     if (hGap > 2 || nGap > 2)
00275         {
00276         left = writeBlock;
00277         *writeBlock++ = *block;
00278         }
00279     else
00280         {
00281         int leftMatch, blockMatch;
00282         int totalMatch;
00283 
00284         /* Update score. */
00285         if (updateScore)
00286             {
00287             leftMatch = roundingScale(left->midScore, left->nEnd-left->nStart, 255);
00288             blockMatch = roundingScale(block->midScore, block->nEnd-block->nStart, 255);
00289             totalMatch = leftMatch + blockMatch - nGap - hGap;
00290             left->midScore = roundingScale(totalMatch, 255, block->nEnd-left->nStart);
00291             }
00292 
00293         /* Update ends. */
00294         left->hEnd = block->hEnd;
00295         left->nEnd = block->nEnd;
00296         left->endGood = block->endGood;  
00297         }
00298     ++block;
00299     }
00300 ca->blockCount = writeBlock - ca->blocks;
00301 }
00302 
00303 void cdaCoalesceAll(struct cdaAli *ca, boolean updateScore)
00304 /* Coalesce blocks separated by small amounts of noise. */
00305 {
00306 for (;ca != NULL; ca = ca->next)
00307     cdaCoalesceOne(ca, updateScore);
00308 }
00309 
00310 void cdaCoalesceBlocks(struct cdaAli *ca)
00311 /* Coalesce blocks separated by small amounts of noise. */
00312 {
00313 cdaCoalesceAll(ca, TRUE);
00314 }
00315 
00316 void cdaCoalesceFast(struct cdaAli *ca)
00317 /* Coalesce blocks as above, but don't update the score. */
00318 {
00319 cdaCoalesceAll(ca, FALSE);
00320 }
00321 
00322 
00323 void cdaShowAlignmentTrack(struct memGfx *mg, 
00324     int xOff, int yOff, int width, int height,  Color goodColor, Color badColor,
00325     int dnaSize, int dnaOffset, struct cdaAli *cda, char repeatChar)
00326 /* Draw alignment on a horizontal track of picture. */
00327 {
00328 struct cdaBlock *block = cda->blocks;
00329 int count = cda->blockCount;
00330 int blockIx;
00331 int scaledHeight = roundingScale(height, dnaSize, width);
00332 MgFont *font = NULL;
00333 int repeatCharWidth = 0;
00334 
00335 if (repeatChar)
00336     {
00337     font = mgSmallFont();
00338     repeatCharWidth = mgFontCharWidth(font, repeatChar);
00339     }
00340 
00341 for (blockIx = 0; blockIx < count; ++blockIx)
00342     {
00343     int x[4];
00344     int b[4];
00345     Color c[3];
00346     int quarterWid;
00347     int i;
00348     int w;
00349 
00350     b[0] = block->hStart;
00351     b[3] = block->hEnd;
00352     quarterWid = (b[3]-b[0]+2)>>2;
00353     if (quarterWid > scaledHeight)
00354         quarterWid = scaledHeight;
00355     b[1] = b[0] + quarterWid;
00356     b[2] = b[3] - quarterWid;
00357 
00358     c[0] = ((block->startGood >= 4 && (blockIx == 0 || block[-1].nEnd == block[0].nStart)) 
00359         ? goodColor : badColor);
00360     c[1] = ((block->midScore > 229) ? goodColor : badColor);
00361     c[2] = ((block->endGood >= 4 && (blockIx == count-1 || block[0].nEnd == block[1].nStart))
00362         ? goodColor : badColor);
00363 
00364     for (i=0; i<4; ++i)
00365         x[i] = roundingScale(b[i]-dnaOffset, width, dnaSize) + xOff;
00366     for (i=0; i<3; ++i)
00367         {
00368         if ((w = x[i+1] - x[i]) > 0)
00369             mgDrawBox(mg, x[i], yOff, w, height, c[i]);
00370         }
00371     
00372     if (repeatChar)
00373         {
00374         char buf[256];
00375         int charCount;
00376         w = x[3] - x[0];
00377         charCount = w/repeatCharWidth;
00378         if (charCount >= sizeof(buf))
00379             charCount = sizeof(buf)-1;
00380         memset(buf, repeatChar, charCount);
00381         buf[charCount] = 0;
00382         mgTextCentered(mg, x[0], yOff, w, height, MG_WHITE, font, buf);
00383         }
00384     ++block;
00385     }
00386 }
00387 
00388 
00389 static UBYTE leftGood(struct ffAli *ali)
00390 {
00391 DNA *n = ali->nStart;
00392 DNA *h = ali->hStart;
00393 int size = ali->nEnd - ali->nStart;
00394 int matchSize = 0;
00395 
00396 while (--size >= 0)
00397     {
00398     if (*n++ != *h++)
00399         break;
00400     ++matchSize;
00401     }
00402 if (matchSize > 255)
00403     matchSize = 255;
00404 return (UBYTE)matchSize;
00405 }
00406 
00407 static UBYTE rightGood(struct ffAli *ali)
00408 {
00409 DNA *n = ali->nEnd;
00410 DNA *h = ali->hEnd;
00411 int size = ali->nEnd - ali->nStart;
00412 int matchSize = 0;
00413 
00414 while (--size >= 0)
00415     {
00416     if (*--n != *--h)
00417         break;
00418     ++matchSize;
00419     }
00420 if (matchSize > 255)
00421     matchSize = 255;
00422 return (UBYTE)matchSize;
00423 }
00424 
00425 
00426 static int countAlis(struct ffAli *ali)
00427 {
00428 int count = 0;
00429 while (ali != NULL)
00430     {
00431     ++count;
00432     ali = ali->right;
00433     }
00434 return count;
00435 }
00436 
00437 struct cdaAli *cdaAliFromFfAli(struct ffAli *aliList, 
00438     DNA *needle, int needleSize, DNA *hay, int haySize, boolean isRc)
00439 /* Convert from ffAli to cdaAli format. */
00440 {
00441 int count = countAlis(aliList);
00442 struct cdaAli *cda;
00443 struct cdaBlock *blocks;
00444 struct ffAli *fa;
00445 int score;
00446 int bases;
00447 
00448 if (isRc)
00449     reverseComplement(needle, needleSize);
00450 AllocVar(cda);
00451 cda->baseCount = needleSize;
00452 cda->strand = (isRc ? '-' : '+');
00453 cda->direction = '+';   /* Actually we don't know. */
00454 cda->orientation = (isRc ? -1 : 1);
00455 cda->blockCount = count;
00456 cda->blocks = blocks = needMem(count * sizeof(blocks[0]));
00457 
00458 for (fa = aliList; fa != NULL; fa = fa->right)
00459     {
00460     blocks->nStart = fa->nStart - needle;
00461     blocks->nEnd = fa->nEnd - needle;
00462     blocks->hStart = fa->hStart - hay;
00463     blocks->hEnd  = fa->hEnd - hay;
00464     blocks->startGood = leftGood(fa);
00465     blocks->endGood = rightGood(fa);
00466     bases = fa->nEnd - fa->nStart;
00467     score = dnaScoreMatch(fa->nStart, fa->hStart, bases);
00468     blocks->midScore = roundingScale(255, score, bases);
00469     ++blocks;
00470     }
00471 cdaFixChromStartEnd(cda);
00472 if (isRc)
00473     {
00474     reverseComplement(needle, needleSize);
00475     }
00476 return cda;
00477 }
00478 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2