00001
00002
00003
00004
00005 #include "common.h"
00006 #include "dnaseq.h"
00007 #include "htmshell.h"
00008 #include "psl.h"
00009 #include "cda.h"
00010 #include "seqOut.h"
00011
00012 static char const rcsid[] = "$Id: pslShow.c,v 1.5 2005/10/05 19:07:43 kent Exp $";
00013
00014 static void pslShowAlignmentStranded(struct psl *psl, boolean isProt,
00015 char *qName, bioSeq *qSeq, int qStart, int qEnd,
00016 char *tName, bioSeq *tSeq, int tStart, int tEnd, FILE *f)
00017
00018 {
00019 boolean tIsRc = (psl->strand[1] == '-');
00020 boolean qIsRc = (psl->strand[0] == '-');
00021 int mulFactor = (isProt ? 3 : 1);
00022 DNA *dna = NULL;
00023 int qSize = qSeq->size;
00024 char *qLetters = cloneString(qSeq->dna);
00025 int qbafStart, qbafEnd, tbafStart, tbafEnd;
00026 int qcfmStart, qcfmEnd, tcfmStart, tcfmEnd;
00027
00028 tbafStart = psl->tStart;
00029 tbafEnd = psl->tEnd;
00030 tcfmStart = psl->tStart;
00031 tcfmEnd = psl->tEnd;
00032
00033 qbafStart = qStart;
00034 qbafEnd = qEnd;
00035 qcfmStart = qStart;
00036 qcfmEnd = qEnd;
00037
00038
00039 if (tIsRc)
00040 {
00041 int temp;
00042 reverseComplement(tSeq->dna, tSeq->size);
00043 temp = psl->tSize - tEnd;
00044 tEnd = psl->tSize - tStart;
00045 tStart = temp;
00046
00047 tbafStart = psl->tEnd;
00048 tbafEnd = psl->tStart;
00049 tcfmStart = psl->tEnd;
00050 tcfmEnd = psl->tStart;
00051 }
00052 if (qIsRc)
00053 {
00054 int temp;
00055 reverseComplement(qSeq->dna, qSeq->size);
00056 reverseComplement(qLetters, qSeq->size);
00057
00058 qcfmStart = qEnd;
00059 qcfmEnd = qStart;
00060 qbafStart = qEnd;
00061 qbafEnd = qStart;
00062
00063 temp = psl->qSize - qEnd;
00064 qEnd = psl->qSize - qStart;
00065 qStart = temp;
00066 }
00067 dna = cloneString(tSeq->dna);
00068
00069 if (qName == NULL)
00070 qName = psl->qName;
00071 if (tName == NULL)
00072 tName = psl->tName;
00073
00074
00075 fputs("Matching bases are colored blue and capitalized. "
00076 "Light blue bases mark the boundaries of gaps in either sequence.\n", f);
00077
00078 fprintf(f, "<H4><A NAME=cDNA></A>%s%s</H4>\n", qName, (qIsRc ? " (reverse complemented)" : ""));
00079 fprintf(f, "<PRE><TT>");
00080 tolowers(qLetters);
00081
00082
00083 {
00084 struct cfm *cfm;
00085 char *colorFlags = needMem(qSeq->size);
00086 int i,j;
00087
00088 for (i=0; i<psl->blockCount; ++i)
00089 {
00090 int qs = psl->qStarts[i] - qStart;
00091 int ts = psl->tStarts[i] - tStart;
00092 int sz = psl->blockSizes[i]-1;
00093 colorFlags[qs] = socBrightBlue;
00094 qLetters[qs] = toupper(qLetters[qs]);
00095 colorFlags[qs+sz] = socBrightBlue;
00096 qLetters[qs+sz] = toupper(qLetters[qs+sz]);
00097 if (isProt)
00098 {
00099 for (j=1; j<sz; ++j)
00100 {
00101 AA aa = qSeq->dna[qs+j];
00102 DNA *codon = &tSeq->dna[ts + 3*j];
00103 AA trans = lookupCodon(codon);
00104 if (trans != 'X' && trans == aa)
00105 {
00106 colorFlags[qs+j] = socBlue;
00107 qLetters[qs+j] = toupper(qLetters[qs+j]);
00108 }
00109 }
00110 }
00111 else
00112 {
00113 for (j=1; j<sz; ++j)
00114 {
00115 if (qSeq->dna[qs+j] == tSeq->dna[ts+j])
00116 {
00117 colorFlags[qs+j] = socBlue;
00118 qLetters[qs+j] = toupper(qLetters[qs+j]);
00119 }
00120 }
00121 }
00122 }
00123 cfm = cfmNew(10, 60, TRUE, qIsRc, f, qcfmStart);
00124 for (i=0; i<qSize; ++i)
00125 cfmOut(cfm, qLetters[i], seqOutColorLookup[(int)colorFlags[i]]);
00126 cfmFree(&cfm);
00127 freez(&colorFlags);
00128 htmHorizontalLine(f);
00129 }
00130 fprintf(f, "</TT></PRE>\n");
00131 fprintf(f, "<H4><A NAME=genomic></A>%s %s:</H4>\n",
00132 tName, (tIsRc ? "(reverse strand)" : ""));
00133 fprintf(f, "<PRE><TT>");
00134
00135
00136 {
00137 struct cfm *cfm;
00138 char *colorFlags = needMem(tSeq->size);
00139 int i,j;
00140 int curBlock = 0;
00141
00142 for (i=0; i<psl->blockCount; ++i)
00143 {
00144 int qs = psl->qStarts[i] - qStart;
00145 int ts = psl->tStarts[i] - tStart;
00146 int sz = psl->blockSizes[i];
00147 if (isProt)
00148 {
00149 for (j=0; j<sz; ++j)
00150 {
00151 AA aa = qSeq->dna[qs+j];
00152 int codonStart = ts + 3*j;
00153 DNA *codon = &tSeq->dna[codonStart];
00154 AA trans = lookupCodon(codon);
00155 if (trans != 'X' && trans == aa)
00156 {
00157 colorFlags[codonStart] = socBlue;
00158 colorFlags[codonStart+1] = socBlue;
00159 colorFlags[codonStart+2] = socBlue;
00160 toUpperN(dna+codonStart, 3);
00161 }
00162 }
00163 }
00164 else
00165 {
00166 for (j=0; j<sz; ++j)
00167 {
00168 if (qSeq->dna[qs+j] == tSeq->dna[ts+j])
00169 {
00170 colorFlags[ts+j] = socBlue;
00171 dna[ts+j] = toupper(dna[ts+j]);
00172 }
00173 }
00174 }
00175 colorFlags[ts] = socBrightBlue;
00176 colorFlags[ts+sz*mulFactor-1] = socBrightBlue;
00177 }
00178
00179 cfm = cfmNew(10, 60, TRUE, tIsRc, f, tcfmStart);
00180
00181 for (i=0; i<tSeq->size; ++i)
00182 {
00183
00184
00185 if (curBlock < psl->blockCount && psl->tStarts[curBlock] == (i + tStart) )
00186 {
00187 fprintf(f, "<A NAME=%d></A>", ++curBlock);
00188
00189 while (curBlock < psl->blockCount &&
00190 psl->tStarts[curBlock] <= tStart + i)
00191 curBlock++;
00192 }
00193 cfmOut(cfm, dna[i], seqOutColorLookup[(int)colorFlags[i]]);
00194 }
00195 cfmFree(&cfm);
00196 freez(&colorFlags);
00197 htmHorizontalLine(f);
00198 }
00199
00200
00201 fprintf(f, "</TT></PRE>\n");
00202 fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment*</H4>\n");
00203 fprintf(f, "<PRE><TT>");
00204 {
00205 struct baf baf;
00206 int i,j;
00207
00208 bafInit(&baf, qSeq->dna, qbafStart, qIsRc,
00209 tSeq->dna, tbafStart, tIsRc, f, 60, isProt);
00210
00211 if (isProt)
00212 {
00213 for (i=0; i<psl->blockCount; ++i)
00214 {
00215 int qs = psl->qStarts[i] - qStart;
00216 int ts = psl->tStarts[i] - tStart;
00217 int sz = psl->blockSizes[i];
00218
00219 bafSetPos(&baf, qs, ts);
00220 bafStartLine(&baf);
00221 for (j=0; j<sz; ++j)
00222 {
00223 AA aa = qSeq->dna[qs+j];
00224 int codonStart = ts + 3*j;
00225 DNA *codon = &tSeq->dna[codonStart];
00226 bafOut(&baf, ' ', codon[0]);
00227 bafOut(&baf, aa, codon[1]);
00228 bafOut(&baf, ' ', codon[2]);
00229 }
00230 bafFlushLine(&baf);
00231 }
00232 fprintf( f,
00233 "<I>*When the translated amino acid in the genomic sequence differs from the \n"
00234 "corresponding amino acid in the protein, the coloring indicates the\n"
00235 "similarity of the two amino acids. Similar amino acids are green, \n"
00236 "dissimilar amino acids are red. The sign of the corresponding entry in\n"
00237 "the BLOSUM 62 matrix is used as the basis of this coloring.</I>\n");
00238 }
00239 else
00240 {
00241 int lastQe = psl->qStarts[0] - qStart;
00242 int lastTe = psl->tStarts[0] - tStart;
00243 int maxSkip = 20;
00244 bafSetPos(&baf, lastQe, lastTe);
00245 bafStartLine(&baf);
00246 for (i=0; i<psl->blockCount; ++i)
00247 {
00248 int qs = psl->qStarts[i] - qStart;
00249 int ts = psl->tStarts[i] - tStart;
00250 int sz = psl->blockSizes[i];
00251 boolean doBreak = TRUE;
00252 int qSkip = qs - lastQe;
00253 int tSkip = ts - lastTe;
00254
00255 if (qSkip >= 0 && qSkip <= maxSkip && tSkip == 0)
00256 {
00257 for (j=0; j<qSkip; ++j)
00258 bafOut(&baf, qSeq->dna[lastQe+j], '-');
00259 doBreak = FALSE;
00260 }
00261 else if (tSkip > 0 && tSkip <= maxSkip && qSkip == 0)
00262 {
00263 for (j=0; j<tSkip; ++j)
00264 bafOut(&baf, '-', tSeq->dna[lastTe+j]);
00265 doBreak = FALSE;
00266 }
00267 if (doBreak)
00268 {
00269 bafFlushLine(&baf);
00270 bafSetPos(&baf, qs, ts);
00271 bafStartLine(&baf);
00272 }
00273 for (j=0; j<sz; ++j)
00274 bafOut(&baf, qSeq->dna[qs+j], tSeq->dna[ts+j]);
00275 lastQe = qs + sz;
00276 lastTe = ts + sz;
00277 }
00278 bafFlushLine(&baf);
00279
00280 fprintf( f, "<I>*Aligned Blocks with gaps <= %d bases are merged for this display</I>\n", maxSkip);
00281 }
00282 }
00283 fprintf(f, "</TT></PRE>");
00284 if (qIsRc)
00285 reverseComplement(qSeq->dna, qSeq->size);
00286 if (tIsRc)
00287 reverseComplement(tSeq->dna, tSeq->size);
00288 freeMem(dna);
00289 freeMem(qLetters);
00290 }
00291
00292 int pslShowAlignment(struct psl *psl, boolean isProt,
00293 char *qName, bioSeq *qSeq, int qStart, int qEnd,
00294 char *tName, bioSeq *tSeq, int tStart, int tEnd, FILE *f)
00295
00296 {
00297
00298
00299 char origStrand[2];
00300 boolean needsSwap = (psl->strand[0] == '-' && psl->strand[1] == 0);
00301 if (needsSwap)
00302 {
00303 memcpy(origStrand, psl->strand, 2);
00304 pslRcBoth(psl);
00305 psl->strand[0] = '+';
00306 psl->strand[1] = '-';
00307 }
00308 pslShowAlignmentStranded(psl, isProt, qName, qSeq, qStart, qEnd,
00309 tName, tSeq, tStart, tEnd, f);
00310 if (needsSwap)
00311 {
00312 pslRcBoth(psl);
00313 memcpy(psl->strand, origStrand, 2);
00314 }
00315 return psl->blockCount;
00316 }
00317