lib/fuzzyShow.c

Go to the documentation of this file.
00001 /* fuzzyShow - routines to show ffAli alignments in text
00002  * or html. 
00003  *
00004  * This file is copyright 2002 Jim Kent, but license is hereby
00005  * granted for all use - public, private or commercial. */
00006 
00007 #include "common.h"
00008 #include "dnautil.h"
00009 #include "memgfx.h"
00010 #include "fuzzyFind.h"
00011 #include "htmshell.h"
00012 #include "cda.h"
00013 #include "seqOut.h"
00014 
00015 static char const rcsid[] = "$Id: fuzzyShow.c,v 1.23 2007/03/27 23:21:25 angie Exp $";
00016 
00017 static void ffShNeedle(FILE *f, DNA *needle, int needleSize,
00018                        int needleNumOffset, char *colorFlags,
00019                        struct ffAli *aliList, boolean upcMatch,
00020                        int cdsS, int cdsE,
00021                        boolean accentRange, int accentStart, int accentEnd)
00022 /* Display the needle sequence with HTML highlighting. */
00023 {
00024 struct cfm *cfm = cfmNew(10, 50, TRUE, FALSE, f, needleNumOffset);
00025 char *n = cloneMem(needle, needleSize);
00026 char *accentFlags = needMem(needleSize);
00027 struct ffAli *leftAli = aliList;
00028 struct ffAli *ali;
00029 long i;
00030 
00031 zeroBytes(colorFlags, needleSize);
00032 zeroBytes(accentFlags, needleSize);
00033 fprintf(f, "<TT><PRE>\n");
00034 if (aliList != NULL)
00035     {
00036     for (leftAli = aliList; leftAli->left != NULL; leftAli = leftAli->left)
00037         ;
00038     }
00039 for (ali = leftAli; ali != NULL; ali = ali->right)
00040     {
00041     boolean utr = FALSE;
00042     int off = ali->nStart-needle;
00043     int count = ali->nEnd - ali->nStart;
00044     if ((cdsE > 0) && ((cdsS-off-1) > 0)) 
00045         utr = TRUE;
00046     for (i=0; i<count; ++i)
00047         {
00048         if (!utr && (i > (cdsE-off-1)) && (cdsE > 0))
00049             utr = TRUE;
00050         if (utr && (i == (cdsS-off)))
00051             utr = FALSE;
00052         if (toupper(ali->hStart[i]) == toupper(ali->nStart[i]))
00053             {
00054             if (utr)
00055                 colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed);
00056             else
00057                 colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue);
00058             if (upcMatch)
00059                 n[off+i] = toupper(n[off+i]);
00060             }
00061         if (accentRange)
00062             {
00063             if (off+i >= accentStart && off+i < accentEnd)
00064                 accentFlags[off+i] = TRUE;
00065             }
00066         }
00067     }
00068 for (i=0; i<needleSize; ++i)
00069     {
00070     if (accentRange && i == accentStart)
00071         fprintf(f, "<A NAME=cDNAStart></A>");
00072     cfmOutExt(cfm, n[i], seqOutColorLookup[(int)colorFlags[i]],
00073               accentFlags[i], accentFlags[i], FALSE);
00074     }
00075 cfmFree(&cfm);
00076 freeMem(n);
00077 freeMem(accentFlags);
00078 fprintf(f, "</TT></PRE>\n");
00079 htmHorizontalLine(f);
00080 }
00081 
00082 int ffShAliPart(FILE *f, struct ffAli *aliList, 
00083     char *needleName, DNA *needle, int needleSize, int needleNumOffset,
00084     char *haystackName, DNA *haystack, int haySize, int hayNumOffset,
00085     int blockMaxGap, boolean rcNeedle, boolean rcHaystack,
00086     boolean showJumpTable, 
00087     boolean showNeedle, boolean showHaystack,
00088     boolean showSideBySide, boolean upcMatch,
00089     int cdsS, int cdsE, int hayPartS, int hayPartE)
00090 /* Display parts of alignment on html page.  If hayPartS..hayPartE is a 
00091  * smaller subrange of the alignment, highlight that part of the alignment 
00092  * in both needle and haystack with underline & bold, and show only that 
00093  * part of the haystack (plus padding).  Returns number of blocks (after
00094  * merging blocks separated by blockMaxGap or less). */
00095 {
00096 long i;
00097 struct ffAli *ali;
00098 struct ffAli *lastAli;
00099 struct ffAli *leftAli = aliList;
00100 struct ffAli *rightAli = aliList;
00101 int charsInLine;
00102 struct baf baf;
00103 int maxSize = (needleSize > haySize ? needleSize : haySize);
00104 char *colorFlags = needMem(maxSize);
00105 int anchorCount = 0;
00106 boolean restrictToWindow = FALSE;
00107 int hayOffStart = 0, hayOffEnd = haySize;
00108 int hayPaddedOffStart = 0, hayPaddedOffEnd = haySize;
00109 int hayExtremity = rcHaystack ? (hayNumOffset + haySize) : hayNumOffset;
00110 int nPartS=0, nPartE=0;
00111 
00112 if (aliList != NULL)
00113     {
00114     while (leftAli->left != NULL) leftAli = leftAli->left;
00115     while (rightAli->right != NULL) rightAli = rightAli->right;
00116     }
00117 
00118 /* If we are only showing part of the alignment, translate haystack window
00119  * coords to needle window coords and haystack-offset window coords: */
00120 if (hayPartS > (hayNumOffset + (leftAli->hStart - haystack)) ||
00121     (hayPartE > 0 && hayPartE < (hayNumOffset + (rightAli->hEnd - haystack))))
00122     {
00123     DNA *haystackPartS;
00124     DNA *haystackPartE;
00125     restrictToWindow = TRUE;
00126     if (rcHaystack)
00127         {
00128         haystackPartS = haystack + (haySize - (hayPartE - hayNumOffset));
00129         haystackPartE = haystack + (haySize - (hayPartS - hayNumOffset));
00130         }
00131     else
00132         {
00133         haystackPartS = haystack + hayPartS - hayNumOffset;
00134         haystackPartE = haystack + hayPartE - hayNumOffset;
00135         }
00136     boolean foundStart = FALSE;
00137     hayOffStart = haystackPartS - haystack;
00138     hayOffEnd = haystackPartE - haystack;
00139     for (ali = leftAli;  ali != NULL;  ali = ali->right)
00140         {
00141         if (haystackPartS < ali->hEnd && !foundStart)
00142             {
00143             int offset = haystackPartS - ali->hStart;
00144             if (offset < 0)
00145                 offset = 0;
00146             nPartS = offset + ali->nStart - needle;
00147             hayOffStart = offset + ali->hStart - haystack;
00148             foundStart = TRUE;
00149             }
00150         if (haystackPartE > ali->hStart)
00151             {
00152             if (haystackPartE > ali->hEnd)
00153                 {
00154                 nPartE = ali->nEnd - needle;
00155                 hayOffEnd = ali->hEnd - haystack;
00156                 }
00157             else
00158                 {
00159                 nPartE = haystackPartE - ali->hStart + ali->nStart - needle;
00160                 hayOffEnd = haystackPartE - haystack;
00161                 }
00162             }
00163         }
00164     hayPaddedOffStart = max(0, (hayOffStart - 100));
00165     hayPaddedOffEnd = min(haySize, (hayOffEnd + 100));
00166     if (rcHaystack)
00167         hayExtremity = hayNumOffset + haySize - hayPaddedOffStart;
00168     else
00169         hayExtremity = hayNumOffset + hayPaddedOffEnd;
00170     }
00171 
00172 if (showJumpTable)
00173     {
00174     fputs("<CENTER><P><TABLE BORDER=1 WIDTH=\"97%\"><TR>", f);
00175     fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNA\">cDNA Sequence</A></TD>", f);
00176     if (restrictToWindow)
00177         fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNAStart\">cDNA Sequence in window</A></TD>", f);
00178     fputs("<TD WIDTH=\"27%\"><P ALIGN=\"CENTER\"><A HREF=\"#genomic\">Genomic Sequence</A></TD>", f);
00179     fputs("<TD WIDTH=\"29%\"><P ALIGN=\"CENTER\"><A HREF=\"#1\">cDNA in Genomic</A></TD>", f);
00180     fputs("<TD WIDTH=\"21%\"><P ALIGN=\"CENTER\"><A HREF=\"#ali\">Side by Side</A></TD>", f);
00181     fputs("</TR></TABLE>\n", f);
00182     }
00183 if (cdsE > 0) 
00184     {
00185     fprintf(f, "Matching bases in coding regions of cDNA and genomic sequences are colored blue%s. ", 
00186             (upcMatch ? " and capitalized" : ""));
00187     fprintf(f, "Matching bases in UTR regions of cDNA and genomic sequences are colored red%s. ", 
00188             (upcMatch ? " and capitalized" : ""));
00189     fputs("Light blue (coding) or orange (UTR) bases mark the boundaries of gaps in either sequence "
00190           "(often splice sites).\n", f);
00191     } 
00192 else 
00193     {
00194     fprintf(f, "Matching bases in cDNA and genomic sequences are colored blue%s. ", 
00195             (upcMatch ? " and capitalized" : ""));
00196     fputs("Light blue bases mark the boundaries of gaps in either sequence "
00197           "(often splice sites).\n", f);
00198     } 
00199 if (showNeedle && restrictToWindow)
00200     fputs("Bases that were in the selected browser region are shown in bold "
00201           "and underlined, "
00202           "and only the alignment for these bases is displayed in the "
00203           "Genomic and Side by Side sections.\n", f);
00204 
00205 if (showJumpTable)
00206     fputs("</P></CENTER>\n", f);
00207 htmHorizontalLine(f);
00208 
00209 fprintf(f, "<H4><A NAME=cDNA></A>cDNA %s%s</H4>\n", needleName, (rcNeedle ? " (reverse complemented)" : ""));
00210 
00211 /* NOTE: if rcHaystack, hayNumOffset changes here into the end, not start! */
00212 if (rcHaystack) 
00213     hayNumOffset += haySize;
00214 
00215 if (rcNeedle)
00216     reverseComplement(needle, needleSize);
00217 
00218 if (showNeedle)
00219     {
00220     ffShNeedle(f, needle, needleSize, needleNumOffset, colorFlags,
00221                aliList, upcMatch, cdsS, cdsE,
00222                restrictToWindow, nPartS, nPartE);
00223     }
00224 
00225 if (showHaystack)
00226     {
00227     struct cfm *cfm = cfmNew(10, 50, TRUE, rcHaystack, f, hayExtremity);
00228     char *h = cloneMem(haystack, haySize);
00229     char *accentFlags = needMem(haySize);
00230     zeroBytes(accentFlags, haySize);
00231     fprintf(f, "<H4><A NAME=genomic></A>Genomic %s %s:</H4>\n", 
00232         haystackName,
00233         (rcHaystack ? "(reverse strand)" : ""));
00234     fprintf(f, "<TT><PRE>\n");
00235     zeroBytes(colorFlags, haySize);
00236     for (ali = leftAli; ali != NULL; ali = ali->right)
00237         {
00238         boolean utr = FALSE;
00239         int i;
00240         int off = ali->hStart-haystack;
00241         int count = ali->hEnd - ali->hStart;
00242         int offn = ali->nStart-needle;
00243         if ((cdsE > 0) && ((cdsS-offn-1) > 0)) 
00244             utr = TRUE;
00245         for (i=0; i<count; ++i)
00246             {
00247             if (!utr && (i > (cdsE-offn-1)) && (cdsE > 0))
00248                 utr = TRUE;
00249             if (utr && (i == (cdsS-offn)))
00250                 utr = FALSE;
00251             if (toupper(ali->hStart[i]) == toupper(ali->nStart[i]))
00252                 {
00253                 if (utr)
00254                     colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed);
00255                 else
00256                     colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue);
00257                 if (upcMatch)
00258                     h[off+i] = toupper(h[off+i]);
00259                 }
00260             if (restrictToWindow && off+i >= hayOffStart && off+i < hayOffEnd)
00261                 accentFlags[off+i] = TRUE;
00262             }
00263         }
00264     ali = leftAli;
00265     lastAli = NULL;
00266     while (ali && (ali->hEnd - haystack) <= hayPaddedOffStart)
00267         ali = ali->right;
00268     for (i = hayPaddedOffStart; i < hayPaddedOffEnd; ++i)
00269         {
00270         /* Put down "anchor" on first match position in haystack
00271          * so user can hop here with a click on the needle. */
00272         if (ali != NULL &&  i == ali->hStart - haystack)
00273             {
00274             if (lastAli == NULL || ali->hStart - lastAli->hEnd > blockMaxGap)
00275                 {
00276                 fprintf(f, "<A NAME=%d></A>", ++anchorCount);
00277                 }
00278             lastAli = ali;
00279             ali = ali->right;
00280             }
00281         cfmOutExt(cfm, h[i], seqOutColorLookup[(int)colorFlags[i]],
00282                   accentFlags[i], accentFlags[i], FALSE);
00283         }
00284     cfmFree(&cfm);
00285     freeMem(h);
00286     fprintf(f, "</TT></PRE>\n");
00287     htmHorizontalLine(f);
00288     }
00289 
00290 if (showSideBySide)
00291     {
00292     fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment</H4>\n");
00293     fprintf(f, "<TT><PRE>\n");
00294     lastAli = NULL;
00295     charsInLine = 0;
00296     bafInit(&baf, needle, needleNumOffset, FALSE, 
00297         haystack, hayNumOffset, rcHaystack, f, 50, FALSE);
00298     for (ali=leftAli; ali!=NULL; ali = ali->right)
00299         {
00300         boolean doBreak = TRUE;
00301         int aliLen;
00302         int i;
00303 
00304         if ((ali->hEnd - haystack) <= hayOffStart ||
00305             (ali->hStart - haystack) >= hayOffEnd)
00306             continue;
00307 
00308         /* Decide whether to put in a line break and/or blank characters */
00309         if (lastAli != NULL)
00310             {
00311             int nSkip = ali->nStart - lastAli->nEnd;
00312             int hSkip = ali->hStart - lastAli->hEnd;
00313             if (nSkip > 0 && nSkip <= blockMaxGap && hSkip == 0)
00314                 {
00315                 for (i=0; i<nSkip; ++i)
00316                     bafOut(&baf, lastAli->nEnd[i],'.');
00317                 doBreak = FALSE; 
00318                 }
00319             else if (hSkip > 0 && hSkip <= blockMaxGap && nSkip == 0)
00320                 {
00321                 for (i=0; i<hSkip; ++i)
00322                     bafOut(&baf, '.', lastAli->hEnd[i]);
00323                 doBreak = FALSE;
00324                 }
00325             else if (hSkip == nSkip && hSkip <= blockMaxGap)
00326                 {
00327                 for (i=0; i<hSkip; ++i)
00328                     bafOut(&baf, lastAli->nEnd[i], lastAli->hEnd[i]);
00329                 doBreak = FALSE;
00330                 }
00331             }
00332         else
00333             {
00334             doBreak = FALSE;
00335             }
00336         if (doBreak)
00337             bafFlushLine(&baf);
00338         int offset = max(0, (hayOffStart - (ali->hStart - haystack)));
00339         int nStart = offset + ali->nStart - needle;
00340         int hStart = offset + ali->hStart - haystack;
00341         bafSetPos(&baf, nStart, hStart);
00342         if (doBreak || lastAli == NULL)
00343             bafStartLine(&baf);
00344         aliLen = ali->nEnd - ali->nStart;
00345         for (i=0; i<aliLen; ++i)
00346             {
00347             int hayOff = i + (ali->hStart - haystack);
00348             if (hayOff < hayOffStart)
00349                 continue;
00350             if (hayOff >= hayOffEnd)
00351                 break;
00352             bafOut(&baf, ali->nStart[i], ali->hStart[i]);
00353             }
00354         lastAli = ali;
00355         }
00356     if (leftAli != NULL)
00357         bafFlushLine(&baf);
00358     fprintf(f, "</TT></PRE>\n");
00359     fprintf(f, "<EM>*Aligned Blocks with gaps &lt;= %d bases are merged for "
00360             "this display when only one sequence has a gap, or when gaps in "
00361             "both sequences are of the same size.</EM>\n", blockMaxGap);
00362     }
00363 if (rcNeedle)
00364     reverseComplement(needle, needleSize);
00365 return anchorCount;
00366 }
00367 
00368 int ffShAli(FILE *f, struct ffAli *aliList, 
00369     char *needleName, DNA *needle, int needleSize, int needleNumOffset,
00370     char *haystackName, DNA *haystack, int haySize, int hayNumOffset,
00371     int blockMaxGap, boolean rcNeedle)
00372 /* Display allignment on html page.  Returns number of blocks (after
00373  * merging blocks separated by blockMaxGap or less). */
00374 {
00375 return ffShAliPart(f, aliList, needleName, needle, needleSize, needleNumOffset,
00376     haystackName, haystack, haySize, hayNumOffset, blockMaxGap, rcNeedle, FALSE,
00377     TRUE, TRUE, TRUE, TRUE, FALSE, 0, 0, 0, 0);
00378 }
00379 
00380 void ffShowAli(struct ffAli *aliList, char *needleName, DNA *needle, int needleNumOffset,
00381     char *haystackName, DNA *haystack, int hayNumOffset, boolean rcNeedle)
00382 /* Display allignment on html page. */
00383 {
00384 ffShAli(stdout, aliList, needleName, needle, strlen(needle), needleNumOffset,
00385     haystackName, haystack, strlen(haystack), hayNumOffset, 8, rcNeedle);
00386 }
00387 #if 0 /* not used */
00388 static struct cdaAli *makeBlocks(struct ffAli *aliList, 
00389     DNA *needle, int needleSize, DNA *hay, int haySize, boolean isRc)
00390 /* Merge together blocks separated only by noise, and evaluate
00391  * left, right, and middle of block for alignment strength. */
00392 {
00393 struct cdaAli *ca = cdaAliFromFfAli(aliList, 
00394     needle, needleSize, hay, haySize, isRc);
00395 cdaCoalesceBlocks(ca);
00396 return ca;
00397 }
00398 #endif

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2