00001
00002
00003
00004
00005
00006
00007 #include "common.h"
00008 #include "dnautil.h"
00009 #include "memgfx.h"
00010 #include "fuzzyFind.h"
00011 #include "htmshell.h"
00012 #include "cda.h"
00013 #include "seqOut.h"
00014
00015 static char const rcsid[] = "$Id: fuzzyShow.c,v 1.23 2007/03/27 23:21:25 angie Exp $";
00016
00017 static void ffShNeedle(FILE *f, DNA *needle, int needleSize,
00018 int needleNumOffset, char *colorFlags,
00019 struct ffAli *aliList, boolean upcMatch,
00020 int cdsS, int cdsE,
00021 boolean accentRange, int accentStart, int accentEnd)
00022
00023 {
00024 struct cfm *cfm = cfmNew(10, 50, TRUE, FALSE, f, needleNumOffset);
00025 char *n = cloneMem(needle, needleSize);
00026 char *accentFlags = needMem(needleSize);
00027 struct ffAli *leftAli = aliList;
00028 struct ffAli *ali;
00029 long i;
00030
00031 zeroBytes(colorFlags, needleSize);
00032 zeroBytes(accentFlags, needleSize);
00033 fprintf(f, "<TT><PRE>\n");
00034 if (aliList != NULL)
00035 {
00036 for (leftAli = aliList; leftAli->left != NULL; leftAli = leftAli->left)
00037 ;
00038 }
00039 for (ali = leftAli; ali != NULL; ali = ali->right)
00040 {
00041 boolean utr = FALSE;
00042 int off = ali->nStart-needle;
00043 int count = ali->nEnd - ali->nStart;
00044 if ((cdsE > 0) && ((cdsS-off-1) > 0))
00045 utr = TRUE;
00046 for (i=0; i<count; ++i)
00047 {
00048 if (!utr && (i > (cdsE-off-1)) && (cdsE > 0))
00049 utr = TRUE;
00050 if (utr && (i == (cdsS-off)))
00051 utr = FALSE;
00052 if (toupper(ali->hStart[i]) == toupper(ali->nStart[i]))
00053 {
00054 if (utr)
00055 colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed);
00056 else
00057 colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue);
00058 if (upcMatch)
00059 n[off+i] = toupper(n[off+i]);
00060 }
00061 if (accentRange)
00062 {
00063 if (off+i >= accentStart && off+i < accentEnd)
00064 accentFlags[off+i] = TRUE;
00065 }
00066 }
00067 }
00068 for (i=0; i<needleSize; ++i)
00069 {
00070 if (accentRange && i == accentStart)
00071 fprintf(f, "<A NAME=cDNAStart></A>");
00072 cfmOutExt(cfm, n[i], seqOutColorLookup[(int)colorFlags[i]],
00073 accentFlags[i], accentFlags[i], FALSE);
00074 }
00075 cfmFree(&cfm);
00076 freeMem(n);
00077 freeMem(accentFlags);
00078 fprintf(f, "</TT></PRE>\n");
00079 htmHorizontalLine(f);
00080 }
00081
00082 int ffShAliPart(FILE *f, struct ffAli *aliList,
00083 char *needleName, DNA *needle, int needleSize, int needleNumOffset,
00084 char *haystackName, DNA *haystack, int haySize, int hayNumOffset,
00085 int blockMaxGap, boolean rcNeedle, boolean rcHaystack,
00086 boolean showJumpTable,
00087 boolean showNeedle, boolean showHaystack,
00088 boolean showSideBySide, boolean upcMatch,
00089 int cdsS, int cdsE, int hayPartS, int hayPartE)
00090
00091
00092
00093
00094
00095 {
00096 long i;
00097 struct ffAli *ali;
00098 struct ffAli *lastAli;
00099 struct ffAli *leftAli = aliList;
00100 struct ffAli *rightAli = aliList;
00101 int charsInLine;
00102 struct baf baf;
00103 int maxSize = (needleSize > haySize ? needleSize : haySize);
00104 char *colorFlags = needMem(maxSize);
00105 int anchorCount = 0;
00106 boolean restrictToWindow = FALSE;
00107 int hayOffStart = 0, hayOffEnd = haySize;
00108 int hayPaddedOffStart = 0, hayPaddedOffEnd = haySize;
00109 int hayExtremity = rcHaystack ? (hayNumOffset + haySize) : hayNumOffset;
00110 int nPartS=0, nPartE=0;
00111
00112 if (aliList != NULL)
00113 {
00114 while (leftAli->left != NULL) leftAli = leftAli->left;
00115 while (rightAli->right != NULL) rightAli = rightAli->right;
00116 }
00117
00118
00119
00120 if (hayPartS > (hayNumOffset + (leftAli->hStart - haystack)) ||
00121 (hayPartE > 0 && hayPartE < (hayNumOffset + (rightAli->hEnd - haystack))))
00122 {
00123 DNA *haystackPartS;
00124 DNA *haystackPartE;
00125 restrictToWindow = TRUE;
00126 if (rcHaystack)
00127 {
00128 haystackPartS = haystack + (haySize - (hayPartE - hayNumOffset));
00129 haystackPartE = haystack + (haySize - (hayPartS - hayNumOffset));
00130 }
00131 else
00132 {
00133 haystackPartS = haystack + hayPartS - hayNumOffset;
00134 haystackPartE = haystack + hayPartE - hayNumOffset;
00135 }
00136 boolean foundStart = FALSE;
00137 hayOffStart = haystackPartS - haystack;
00138 hayOffEnd = haystackPartE - haystack;
00139 for (ali = leftAli; ali != NULL; ali = ali->right)
00140 {
00141 if (haystackPartS < ali->hEnd && !foundStart)
00142 {
00143 int offset = haystackPartS - ali->hStart;
00144 if (offset < 0)
00145 offset = 0;
00146 nPartS = offset + ali->nStart - needle;
00147 hayOffStart = offset + ali->hStart - haystack;
00148 foundStart = TRUE;
00149 }
00150 if (haystackPartE > ali->hStart)
00151 {
00152 if (haystackPartE > ali->hEnd)
00153 {
00154 nPartE = ali->nEnd - needle;
00155 hayOffEnd = ali->hEnd - haystack;
00156 }
00157 else
00158 {
00159 nPartE = haystackPartE - ali->hStart + ali->nStart - needle;
00160 hayOffEnd = haystackPartE - haystack;
00161 }
00162 }
00163 }
00164 hayPaddedOffStart = max(0, (hayOffStart - 100));
00165 hayPaddedOffEnd = min(haySize, (hayOffEnd + 100));
00166 if (rcHaystack)
00167 hayExtremity = hayNumOffset + haySize - hayPaddedOffStart;
00168 else
00169 hayExtremity = hayNumOffset + hayPaddedOffEnd;
00170 }
00171
00172 if (showJumpTable)
00173 {
00174 fputs("<CENTER><P><TABLE BORDER=1 WIDTH=\"97%\"><TR>", f);
00175 fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNA\">cDNA Sequence</A></TD>", f);
00176 if (restrictToWindow)
00177 fputs("<TD WIDTH=\"23%\"><P ALIGN=CENTER><A HREF=\"#cDNAStart\">cDNA Sequence in window</A></TD>", f);
00178 fputs("<TD WIDTH=\"27%\"><P ALIGN=\"CENTER\"><A HREF=\"#genomic\">Genomic Sequence</A></TD>", f);
00179 fputs("<TD WIDTH=\"29%\"><P ALIGN=\"CENTER\"><A HREF=\"#1\">cDNA in Genomic</A></TD>", f);
00180 fputs("<TD WIDTH=\"21%\"><P ALIGN=\"CENTER\"><A HREF=\"#ali\">Side by Side</A></TD>", f);
00181 fputs("</TR></TABLE>\n", f);
00182 }
00183 if (cdsE > 0)
00184 {
00185 fprintf(f, "Matching bases in coding regions of cDNA and genomic sequences are colored blue%s. ",
00186 (upcMatch ? " and capitalized" : ""));
00187 fprintf(f, "Matching bases in UTR regions of cDNA and genomic sequences are colored red%s. ",
00188 (upcMatch ? " and capitalized" : ""));
00189 fputs("Light blue (coding) or orange (UTR) bases mark the boundaries of gaps in either sequence "
00190 "(often splice sites).\n", f);
00191 }
00192 else
00193 {
00194 fprintf(f, "Matching bases in cDNA and genomic sequences are colored blue%s. ",
00195 (upcMatch ? " and capitalized" : ""));
00196 fputs("Light blue bases mark the boundaries of gaps in either sequence "
00197 "(often splice sites).\n", f);
00198 }
00199 if (showNeedle && restrictToWindow)
00200 fputs("Bases that were in the selected browser region are shown in bold "
00201 "and underlined, "
00202 "and only the alignment for these bases is displayed in the "
00203 "Genomic and Side by Side sections.\n", f);
00204
00205 if (showJumpTable)
00206 fputs("</P></CENTER>\n", f);
00207 htmHorizontalLine(f);
00208
00209 fprintf(f, "<H4><A NAME=cDNA></A>cDNA %s%s</H4>\n", needleName, (rcNeedle ? " (reverse complemented)" : ""));
00210
00211
00212 if (rcHaystack)
00213 hayNumOffset += haySize;
00214
00215 if (rcNeedle)
00216 reverseComplement(needle, needleSize);
00217
00218 if (showNeedle)
00219 {
00220 ffShNeedle(f, needle, needleSize, needleNumOffset, colorFlags,
00221 aliList, upcMatch, cdsS, cdsE,
00222 restrictToWindow, nPartS, nPartE);
00223 }
00224
00225 if (showHaystack)
00226 {
00227 struct cfm *cfm = cfmNew(10, 50, TRUE, rcHaystack, f, hayExtremity);
00228 char *h = cloneMem(haystack, haySize);
00229 char *accentFlags = needMem(haySize);
00230 zeroBytes(accentFlags, haySize);
00231 fprintf(f, "<H4><A NAME=genomic></A>Genomic %s %s:</H4>\n",
00232 haystackName,
00233 (rcHaystack ? "(reverse strand)" : ""));
00234 fprintf(f, "<TT><PRE>\n");
00235 zeroBytes(colorFlags, haySize);
00236 for (ali = leftAli; ali != NULL; ali = ali->right)
00237 {
00238 boolean utr = FALSE;
00239 int i;
00240 int off = ali->hStart-haystack;
00241 int count = ali->hEnd - ali->hStart;
00242 int offn = ali->nStart-needle;
00243 if ((cdsE > 0) && ((cdsS-offn-1) > 0))
00244 utr = TRUE;
00245 for (i=0; i<count; ++i)
00246 {
00247 if (!utr && (i > (cdsE-offn-1)) && (cdsE > 0))
00248 utr = TRUE;
00249 if (utr && (i == (cdsS-offn)))
00250 utr = FALSE;
00251 if (toupper(ali->hStart[i]) == toupper(ali->nStart[i]))
00252 {
00253 if (utr)
00254 colorFlags[off+i] = ((i == 0 || i == count-1) ? socOrange : socRed);
00255 else
00256 colorFlags[off+i] = ((i == 0 || i == count-1) ? socBrightBlue : socBlue);
00257 if (upcMatch)
00258 h[off+i] = toupper(h[off+i]);
00259 }
00260 if (restrictToWindow && off+i >= hayOffStart && off+i < hayOffEnd)
00261 accentFlags[off+i] = TRUE;
00262 }
00263 }
00264 ali = leftAli;
00265 lastAli = NULL;
00266 while (ali && (ali->hEnd - haystack) <= hayPaddedOffStart)
00267 ali = ali->right;
00268 for (i = hayPaddedOffStart; i < hayPaddedOffEnd; ++i)
00269 {
00270
00271
00272 if (ali != NULL && i == ali->hStart - haystack)
00273 {
00274 if (lastAli == NULL || ali->hStart - lastAli->hEnd > blockMaxGap)
00275 {
00276 fprintf(f, "<A NAME=%d></A>", ++anchorCount);
00277 }
00278 lastAli = ali;
00279 ali = ali->right;
00280 }
00281 cfmOutExt(cfm, h[i], seqOutColorLookup[(int)colorFlags[i]],
00282 accentFlags[i], accentFlags[i], FALSE);
00283 }
00284 cfmFree(&cfm);
00285 freeMem(h);
00286 fprintf(f, "</TT></PRE>\n");
00287 htmHorizontalLine(f);
00288 }
00289
00290 if (showSideBySide)
00291 {
00292 fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment</H4>\n");
00293 fprintf(f, "<TT><PRE>\n");
00294 lastAli = NULL;
00295 charsInLine = 0;
00296 bafInit(&baf, needle, needleNumOffset, FALSE,
00297 haystack, hayNumOffset, rcHaystack, f, 50, FALSE);
00298 for (ali=leftAli; ali!=NULL; ali = ali->right)
00299 {
00300 boolean doBreak = TRUE;
00301 int aliLen;
00302 int i;
00303
00304 if ((ali->hEnd - haystack) <= hayOffStart ||
00305 (ali->hStart - haystack) >= hayOffEnd)
00306 continue;
00307
00308
00309 if (lastAli != NULL)
00310 {
00311 int nSkip = ali->nStart - lastAli->nEnd;
00312 int hSkip = ali->hStart - lastAli->hEnd;
00313 if (nSkip > 0 && nSkip <= blockMaxGap && hSkip == 0)
00314 {
00315 for (i=0; i<nSkip; ++i)
00316 bafOut(&baf, lastAli->nEnd[i],'.');
00317 doBreak = FALSE;
00318 }
00319 else if (hSkip > 0 && hSkip <= blockMaxGap && nSkip == 0)
00320 {
00321 for (i=0; i<hSkip; ++i)
00322 bafOut(&baf, '.', lastAli->hEnd[i]);
00323 doBreak = FALSE;
00324 }
00325 else if (hSkip == nSkip && hSkip <= blockMaxGap)
00326 {
00327 for (i=0; i<hSkip; ++i)
00328 bafOut(&baf, lastAli->nEnd[i], lastAli->hEnd[i]);
00329 doBreak = FALSE;
00330 }
00331 }
00332 else
00333 {
00334 doBreak = FALSE;
00335 }
00336 if (doBreak)
00337 bafFlushLine(&baf);
00338 int offset = max(0, (hayOffStart - (ali->hStart - haystack)));
00339 int nStart = offset + ali->nStart - needle;
00340 int hStart = offset + ali->hStart - haystack;
00341 bafSetPos(&baf, nStart, hStart);
00342 if (doBreak || lastAli == NULL)
00343 bafStartLine(&baf);
00344 aliLen = ali->nEnd - ali->nStart;
00345 for (i=0; i<aliLen; ++i)
00346 {
00347 int hayOff = i + (ali->hStart - haystack);
00348 if (hayOff < hayOffStart)
00349 continue;
00350 if (hayOff >= hayOffEnd)
00351 break;
00352 bafOut(&baf, ali->nStart[i], ali->hStart[i]);
00353 }
00354 lastAli = ali;
00355 }
00356 if (leftAli != NULL)
00357 bafFlushLine(&baf);
00358 fprintf(f, "</TT></PRE>\n");
00359 fprintf(f, "<EM>*Aligned Blocks with gaps <= %d bases are merged for "
00360 "this display when only one sequence has a gap, or when gaps in "
00361 "both sequences are of the same size.</EM>\n", blockMaxGap);
00362 }
00363 if (rcNeedle)
00364 reverseComplement(needle, needleSize);
00365 return anchorCount;
00366 }
00367
00368 int ffShAli(FILE *f, struct ffAli *aliList,
00369 char *needleName, DNA *needle, int needleSize, int needleNumOffset,
00370 char *haystackName, DNA *haystack, int haySize, int hayNumOffset,
00371 int blockMaxGap, boolean rcNeedle)
00372
00373
00374 {
00375 return ffShAliPart(f, aliList, needleName, needle, needleSize, needleNumOffset,
00376 haystackName, haystack, haySize, hayNumOffset, blockMaxGap, rcNeedle, FALSE,
00377 TRUE, TRUE, TRUE, TRUE, FALSE, 0, 0, 0, 0);
00378 }
00379
00380 void ffShowAli(struct ffAli *aliList, char *needleName, DNA *needle, int needleNumOffset,
00381 char *haystackName, DNA *haystack, int hayNumOffset, boolean rcNeedle)
00382
00383 {
00384 ffShAli(stdout, aliList, needleName, needle, strlen(needle), needleNumOffset,
00385 haystackName, haystack, strlen(haystack), hayNumOffset, 8, rcNeedle);
00386 }
00387 #if 0
00388 static struct cdaAli *makeBlocks(struct ffAli *aliList,
00389 DNA *needle, int needleSize, DNA *hay, int haySize, boolean isRc)
00390
00391
00392 {
00393 struct cdaAli *ca = cdaAliFromFfAli(aliList,
00394 needle, needleSize, hay, haySize, isRc);
00395 cdaCoalesceBlocks(ca);
00396 return ca;
00397 }
00398 #endif