00001
00002
00003
00004
00005
00006
00007
00008 #include "common.h"
00009 #include "dnaseq.h"
00010 #include "htmshell.h"
00011 #include "psl.h"
00012 #include "cda.h"
00013 #include "seqOut.h"
00014
00015
00016 static void pslShowAlignmentStranded2(struct psl *psl, boolean isProt,
00017 char *qName, bioSeq *qSeq, int qStart, int qEnd,
00018 char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f)
00019
00020
00021
00022
00023 {
00024 boolean tIsRc = (psl->strand[1] == '-');
00025 boolean qIsRc = (psl->strand[0] == '-');
00026 int mulFactor = (isProt ? 3 : 1);
00027 DNA *dna = NULL;
00028 int qSize = qSeq->size;
00029 char *qLetters = cloneString(qSeq->dna);
00030 int qbafStart, qbafEnd, tbafStart, tbafEnd;
00031 int qcfmStart, qcfmEnd, tcfmStart, tcfmEnd;
00032
00033 tbafStart = psl->tStart;
00034 tbafEnd = psl->tEnd;
00035 tcfmStart = psl->tStart;
00036 tcfmEnd = psl->tEnd;
00037
00038 qbafStart = qStart;
00039 qbafEnd = qEnd;
00040 qcfmStart = qStart;
00041 qcfmEnd = qEnd;
00042
00043
00044 if (tIsRc)
00045 {
00046 int temp;
00047 reverseComplement(tSeq->dna, tSeq->size);
00048 temp = psl->tSize - tEnd;
00049 tEnd = psl->tSize - tStart;
00050 tStart = temp;
00051
00052 tbafStart = psl->tEnd;
00053 tbafEnd = psl->tStart;
00054 tcfmStart = psl->tEnd;
00055 tcfmEnd = psl->tStart;
00056 }
00057 if (qIsRc)
00058 {
00059 int temp, j;
00060 reverseComplement(qSeq->dna, qSeq->size);
00061 reverseComplement(qLetters, qSeq->size);
00062
00063 qcfmStart = qEnd;
00064 qcfmEnd = qStart;
00065 qbafStart = qEnd;
00066 qbafEnd = qStart;
00067
00068 temp = psl->qSize - qEnd;
00069 qEnd = psl->qSize - qStart;
00070 qStart = temp;
00071 for(j = 0; j < exnCnt; j++)
00072 {
00073 temp = psl->qSize - exnStarts[j];
00074 exnStarts[j] = psl->qSize - exnEnds[j];
00075 exnEnds[j] = temp;
00076 }
00077 reverseInts(exnEnds, exnCnt);
00078 reverseInts(exnStarts, exnCnt);
00079 }
00080
00081 dna = cloneString(tSeq->dna);
00082
00083 if (qName == NULL)
00084 qName = psl->qName;
00085 if (tName == NULL)
00086 tName = psl->tName;
00087
00088
00089 fputs("Matching bases are colored blue and capitalized. "
00090 "Light blue bases mark the boundaries of gaps in either aligned sequence. "
00091 "Red bases are unaligned exons' bases of the query gene. \n", f);
00092
00093 fprintf(f, "<H4><A NAME=cDNA></A>%s%s</H4>\n", qName, (qIsRc ? " (reverse complemented)" : ""));
00094 fprintf(f, "<PRE><TT>");
00095 tolowers(qLetters);
00096
00097
00098 {
00099 struct cfm *cfm;
00100 char *colorFlags = needMem(qSeq->size);
00101 int i = 0, j = 0, exnIdx = 0;
00102 int preStop = 0;
00103
00104 for (i=0; i<psl->blockCount; ++i)
00105 {
00106 int qs = psl->qStarts[i] - qStart;
00107 int ts = psl->tStarts[i] - tStart;
00108 int sz = psl->blockSizes[i]-1;
00109 int end = 0;
00110 bool omitExon = FALSE;
00111 while(exnIdx < exnCnt && psl->qStarts[i] > exnEnds[exnIdx])
00112 {
00113 if(omitExon)
00114 {
00115 for( j = exnStarts[exnIdx] - qStart; j < exnEnds[exnIdx]-qStart; j++)
00116 {
00117 colorFlags[j] = socRed;
00118 }
00119 }
00120 exnIdx++;
00121 preStop = exnStarts[exnIdx] - qStart;
00122 omitExon = TRUE;
00123 }
00124
00125
00126 colorFlags[qs] = socBrightBlue;
00127 qLetters[qs] = toupper(qLetters[qs]);
00128 colorFlags[qs+sz] = socBrightBlue;
00129 qLetters[qs+sz] = toupper(qLetters[qs+sz]);
00130
00131
00132 if( i < psl->blockCount -1)
00133 end = psl->qStarts[i+1] < exnEnds[exnIdx] ? psl->qStarts[i+1] - qStart : exnEnds[exnIdx] - qStart;
00134 else
00135 end = qs + sz;
00136
00137 for (j=preStop; j < end; j++)
00138 {
00139 if(j == 82)
00140 fprintf(stderr, "right here\n");
00141 if (j > qs && j < qs+sz)
00142 {
00143 if (qSeq->dna[j] == tSeq->dna[ts+j-qs])
00144 {
00145 colorFlags[j] = socBlue;
00146 qLetters[j] = toupper(qLetters[j]);
00147 }
00148 }
00149 else if(colorFlags[j] != socBrightBlue && colorFlags[j] != socBlue)
00150 colorFlags[j] = socRed;
00151 }
00152 preStop = end;
00153 }
00154 cfm = cfmNew(10, 60, TRUE, qIsRc, f, qcfmStart);
00155 for (i=0; i<qSize; ++i)
00156 cfmOut(cfm, qLetters[i], seqOutColorLookup[(int)colorFlags[i]]);
00157 cfmFree(&cfm);
00158 freez(&colorFlags);
00159 htmHorizontalLine(f);
00160 }
00161 fprintf(f, "</TT></PRE>\n");
00162 fprintf(f, "<H4><A NAME=genomic></A>%s %s:</H4>\n",
00163 tName, (tIsRc ? "(reverse strand)" : ""));
00164 fprintf(f, "<PRE><TT>");
00165
00166
00167 {
00168 struct cfm *cfm;
00169 char *colorFlags = needMem(tSeq->size);
00170 int i,j;
00171 int curBlock = 0;
00172
00173 for (i=0; i<psl->blockCount; ++i)
00174 {
00175 int qs = psl->qStarts[i] - qStart;
00176 int ts = psl->tStarts[i] - tStart;
00177 int sz = psl->blockSizes[i];
00178 if (isProt)
00179 {
00180 for (j=0; j<sz; ++j)
00181 {
00182 AA aa = qSeq->dna[qs+j];
00183 int codonStart = ts + 3*j;
00184 DNA *codon = &tSeq->dna[codonStart];
00185 AA trans = lookupCodon(codon);
00186 if (trans != 'X' && trans == aa)
00187 {
00188 colorFlags[codonStart] = socBlue;
00189 colorFlags[codonStart+1] = socBlue;
00190 colorFlags[codonStart+2] = socBlue;
00191 toUpperN(dna+codonStart, 3);
00192 }
00193 }
00194 }
00195 else
00196 {
00197 for (j=0; j<sz; ++j)
00198 {
00199 if (qSeq->dna[qs+j] == tSeq->dna[ts+j])
00200 {
00201 colorFlags[ts+j] = socBlue;
00202 dna[ts+j] = toupper(dna[ts+j]);
00203 }
00204 }
00205 }
00206 colorFlags[ts] = socBrightBlue;
00207 colorFlags[ts+sz*mulFactor-1] = socBrightBlue;
00208 }
00209
00210 cfm = cfmNew(10, 60, TRUE, tIsRc, f, tcfmStart);
00211
00212 for (i=0; i<tSeq->size; ++i)
00213 {
00214
00215
00216 if (curBlock < psl->blockCount && psl->tStarts[curBlock] == (i + tStart) )
00217 {
00218 fprintf(f, "<A NAME=%d></A>", ++curBlock);
00219
00220 while (curBlock < psl->blockCount &&
00221 psl->tStarts[curBlock] <= tStart + i)
00222 curBlock++;
00223 }
00224 cfmOut(cfm, dna[i], seqOutColorLookup[(int)colorFlags[i]]);
00225 }
00226 cfmFree(&cfm);
00227 freez(&colorFlags);
00228 htmHorizontalLine(f);
00229 }
00230
00231
00232 fprintf(f, "</TT></PRE>\n");
00233 fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment*</H4>\n");
00234 fprintf(f, "<PRE><TT>");
00235 {
00236 struct baf baf;
00237 int i,j;
00238
00239 bafInit(&baf, qSeq->dna, qbafStart, qIsRc,
00240 tSeq->dna, tbafStart, tIsRc, f, 60, isProt);
00241
00242 if (isProt)
00243 {
00244 for (i=0; i<psl->blockCount; ++i)
00245 {
00246 int qs = psl->qStarts[i] - qStart;
00247 int ts = psl->tStarts[i] - tStart;
00248 int sz = psl->blockSizes[i];
00249
00250 bafSetPos(&baf, qs, ts);
00251 bafStartLine(&baf);
00252 for (j=0; j<sz; ++j)
00253 {
00254 AA aa = qSeq->dna[qs+j];
00255 int codonStart = ts + 3*j;
00256 DNA *codon = &tSeq->dna[codonStart];
00257 bafOut(&baf, ' ', codon[0]);
00258 bafOut(&baf, aa, codon[1]);
00259 bafOut(&baf, ' ', codon[2]);
00260 }
00261 bafFlushLine(&baf);
00262 }
00263 fprintf( f, "<I>*when aa is different, BLOSUM positives are in green, BLOSUM negatives in red</I>\n");
00264 }
00265 else
00266 {
00267 int lastQe = psl->qStarts[0] - qStart;
00268 int lastTe = psl->tStarts[0] - tStart;
00269 int maxSkip = 20;
00270 bafSetPos(&baf, lastQe, lastTe);
00271 bafStartLine(&baf);
00272 for (i=0; i<psl->blockCount; ++i)
00273 {
00274 int qs = psl->qStarts[i] - qStart;
00275 int ts = psl->tStarts[i] - tStart;
00276 int sz = psl->blockSizes[i];
00277 boolean doBreak = TRUE;
00278 int qSkip = qs - lastQe;
00279 int tSkip = ts - lastTe;
00280
00281 if (qSkip >= 0 && qSkip <= maxSkip && tSkip == 0)
00282 {
00283 for (j=0; j<qSkip; ++j)
00284 bafOut(&baf, qSeq->dna[lastQe+j], '-');
00285 doBreak = FALSE;
00286 }
00287 else if (tSkip > 0 && tSkip <= maxSkip && qSkip == 0)
00288 {
00289 for (j=0; j<tSkip; ++j)
00290 bafOut(&baf, '-', tSeq->dna[lastTe+j]);
00291 doBreak = FALSE;
00292 }
00293 if (doBreak)
00294 {
00295 bafFlushLine(&baf);
00296 bafSetPos(&baf, qs, ts);
00297 bafStartLine(&baf);
00298 }
00299 for (j=0; j<sz; ++j)
00300 bafOut(&baf, qSeq->dna[qs+j], tSeq->dna[ts+j]);
00301 lastQe = qs + sz;
00302 lastTe = ts + sz;
00303 }
00304 bafFlushLine(&baf);
00305
00306 fprintf( f, "<I>*Aligned Blocks with gaps <= %d bases are merged for this display</I>\n", maxSkip);
00307 }
00308 }
00309 fprintf(f, "</TT></PRE>");
00310 if (qIsRc)
00311 reverseComplement(qSeq->dna, qSeq->size);
00312 if (tIsRc)
00313 reverseComplement(tSeq->dna, tSeq->size);
00314 freeMem(dna);
00315 freeMem(qLetters);
00316 }
00317
00318
00319 int pslGenoShowAlignment(struct psl *psl, boolean isProt,
00320 char *qName, bioSeq *qSeq, int qStart, int qEnd,
00321 char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f)
00322
00323
00324 {
00325
00326
00327 char origStrand[2];
00328 boolean needsSwap = (psl->strand[0] == '-' && psl->strand[1] == 0);
00329 if (needsSwap)
00330 {
00331 memcpy(origStrand, psl->strand, 2);
00332 pslRcBoth(psl);
00333 psl->strand[0] = '+';
00334 psl->strand[1] = '-';
00335 }
00336 pslShowAlignmentStranded2(psl, isProt, qName, qSeq, qStart, qEnd,
00337 tName, tSeq, tStart, tEnd,exnStarts, exnEnds, exnCnt, f);
00338 if (needsSwap)
00339 {
00340 pslRcBoth(psl);
00341 memcpy(psl->strand, origStrand, 2);
00342 }
00343 return psl->blockCount;
00344 }
00345