lib/pslGenoShow.c

Go to the documentation of this file.
00001 /* Show aligned exons between a pre-located gene (a stamper gene) in the genome 
00002  *and its homologues (stamp elements) in the genome. 
00003  *The aligned exon sequences are shown in blue as regular blat alignment. 
00004  * The unaligned exon sequence are shown in red. Intron sequences are shown in black.
00005  * It is modified from pslShow.c */
00006 
00007 
00008 #include "common.h"
00009 #include "dnaseq.h"
00010 #include "htmshell.h"
00011 #include "psl.h"
00012 #include "cda.h"
00013 #include "seqOut.h"
00014 
00015 
00016 static void pslShowAlignmentStranded2(struct psl *psl, boolean isProt,
00017         char *qName, bioSeq *qSeq, int qStart, int qEnd,
00018         char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f)
00019 /* Show stamper gene and stamp elements alignment using genomic sequence.
00020  * The aligned exons' sequence of stamper gene are shown in colors as usual, but the
00021  * the unaligned exon's sequence of stamper gene are shown in red color.
00022  */
00023 {
00024 boolean tIsRc = (psl->strand[1] == '-');
00025 boolean qIsRc = (psl->strand[0] == '-');
00026 int mulFactor = (isProt ? 3 : 1);
00027 DNA *dna = NULL;        /* Mixed case version of genomic DNA. */
00028 int qSize = qSeq->size;
00029 char *qLetters = cloneString(qSeq->dna);
00030 int qbafStart, qbafEnd, tbafStart, tbafEnd;
00031 int qcfmStart, qcfmEnd, tcfmStart, tcfmEnd;
00032 
00033 tbafStart = psl->tStart;
00034 tbafEnd   = psl->tEnd;
00035 tcfmStart = psl->tStart;
00036 tcfmEnd   = psl->tEnd;
00037 
00038 qbafStart = qStart;
00039 qbafEnd   = qEnd;
00040 qcfmStart = qStart;
00041 qcfmEnd   = qEnd;
00042 
00043 /* Deal with minus strand. */
00044 if (tIsRc)
00045     {
00046     int temp;
00047     reverseComplement(tSeq->dna, tSeq->size);
00048     temp = psl->tSize - tEnd;
00049     tEnd = psl->tSize - tStart;
00050     tStart = temp;
00051     
00052     tbafStart = psl->tEnd;
00053     tbafEnd   = psl->tStart;
00054     tcfmStart = psl->tEnd;
00055     tcfmEnd   = psl->tStart;
00056     }
00057 if (qIsRc)
00058     {
00059     int temp, j;
00060     reverseComplement(qSeq->dna, qSeq->size);
00061     reverseComplement(qLetters, qSeq->size);
00062 
00063     qcfmStart = qEnd;
00064     qcfmEnd   = qStart;
00065     qbafStart = qEnd;
00066     qbafEnd   = qStart;
00067     
00068     temp = psl->qSize - qEnd;
00069     qEnd = psl->qSize - qStart;
00070     qStart = temp;
00071     for(j = 0; j < exnCnt; j++)
00072         {
00073         temp = psl->qSize - exnStarts[j];
00074         exnStarts[j] = psl->qSize - exnEnds[j];
00075         exnEnds[j] = temp;
00076         }
00077     reverseInts(exnEnds, exnCnt);
00078     reverseInts(exnStarts, exnCnt);
00079     }
00080 
00081 dna = cloneString(tSeq->dna);
00082 
00083 if (qName == NULL) 
00084     qName = psl->qName;
00085 if (tName == NULL)
00086     tName = psl->tName;
00087 
00088 
00089 fputs("Matching bases are colored blue and capitalized. " 
00090       "Light blue bases mark the boundaries of gaps in either aligned sequence. "
00091       "Red bases are unaligned exons' bases of the query gene. \n", f);
00092 
00093 fprintf(f, "<H4><A NAME=cDNA></A>%s%s</H4>\n", qName, (qIsRc  ? " (reverse complemented)" : ""));
00094 fprintf(f, "<PRE><TT>");
00095 tolowers(qLetters);
00096 
00097 /* Display query sequence. */
00098     {
00099     struct cfm *cfm;
00100     char *colorFlags = needMem(qSeq->size);
00101     int i = 0, j = 0, exnIdx = 0;
00102     int preStop = 0;
00103     
00104     for (i=0; i<psl->blockCount; ++i)
00105         {
00106         int qs = psl->qStarts[i] - qStart;
00107         int ts = psl->tStarts[i] - tStart;
00108         int sz = psl->blockSizes[i]-1;
00109         int end = 0;
00110         bool omitExon = FALSE;
00111         while(exnIdx < exnCnt && psl->qStarts[i] > exnEnds[exnIdx])
00112             {
00113             if(omitExon)
00114                 {
00115                 for( j = exnStarts[exnIdx] - qStart; j < exnEnds[exnIdx]-qStart; j++)
00116                     {
00117                     colorFlags[j] = socRed;
00118                     }
00119                 }
00120             exnIdx++;
00121             preStop = exnStarts[exnIdx] - qStart;
00122             omitExon = TRUE;
00123             }
00124 
00125         /*mark the boundary bases */
00126         colorFlags[qs] = socBrightBlue;
00127         qLetters[qs] = toupper(qLetters[qs]);
00128         colorFlags[qs+sz] = socBrightBlue;
00129         qLetters[qs+sz] = toupper(qLetters[qs+sz]);
00130         
00131         /* determine block end */
00132         if( i < psl->blockCount -1)
00133             end = psl->qStarts[i+1] < exnEnds[exnIdx] ? psl->qStarts[i+1] - qStart : exnEnds[exnIdx] - qStart;
00134         else
00135             end = qs + sz;
00136             
00137         for (j=preStop; j < end; j++)
00138             {
00139             if(j == 82)
00140                 fprintf(stderr, "right here\n");
00141             if (j > qs && j < qs+sz)
00142                 {
00143                 if (qSeq->dna[j] == tSeq->dna[ts+j-qs])
00144                     {
00145                     colorFlags[j] = socBlue;
00146                     qLetters[j] = toupper(qLetters[j]);
00147                     }           
00148                 }
00149             else if(colorFlags[j] != socBrightBlue && colorFlags[j] != socBlue)
00150                 colorFlags[j] = socRed;
00151             }
00152         preStop = end;
00153         }
00154     cfm = cfmNew(10, 60, TRUE, qIsRc, f, qcfmStart);
00155     for (i=0; i<qSize; ++i)
00156         cfmOut(cfm, qLetters[i], seqOutColorLookup[(int)colorFlags[i]]);
00157     cfmFree(&cfm);
00158     freez(&colorFlags);
00159     htmHorizontalLine(f);
00160     }
00161 fprintf(f, "</TT></PRE>\n");
00162 fprintf(f, "<H4><A NAME=genomic></A>%s %s:</H4>\n", 
00163         tName, (tIsRc ? "(reverse strand)" : ""));
00164 fprintf(f, "<PRE><TT>");
00165 
00166 /* Display DNA sequence. */
00167     {
00168     struct cfm *cfm;
00169     char *colorFlags = needMem(tSeq->size);
00170     int i,j;
00171     int curBlock = 0;
00172 
00173     for (i=0; i<psl->blockCount; ++i)
00174         {
00175         int qs = psl->qStarts[i] - qStart;
00176         int ts = psl->tStarts[i] - tStart;
00177         int sz = psl->blockSizes[i];
00178         if (isProt)
00179             {
00180             for (j=0; j<sz; ++j)
00181                 {
00182                 AA aa = qSeq->dna[qs+j];
00183                 int codonStart = ts + 3*j;
00184                 DNA *codon = &tSeq->dna[codonStart];
00185                 AA trans = lookupCodon(codon);
00186                 if (trans != 'X' && trans == aa)
00187                     {
00188                     colorFlags[codonStart] = socBlue;
00189                     colorFlags[codonStart+1] = socBlue;
00190                     colorFlags[codonStart+2] = socBlue;
00191                     toUpperN(dna+codonStart, 3);
00192                     }
00193                 }
00194             }
00195         else
00196             {
00197             for (j=0; j<sz; ++j)
00198                 {
00199                 if (qSeq->dna[qs+j] == tSeq->dna[ts+j])
00200                     {
00201                     colorFlags[ts+j] = socBlue;
00202                     dna[ts+j] = toupper(dna[ts+j]);
00203                     }
00204                 }
00205             }
00206         colorFlags[ts] = socBrightBlue;
00207         colorFlags[ts+sz*mulFactor-1] = socBrightBlue;
00208         }
00209 
00210     cfm = cfmNew(10, 60, TRUE, tIsRc, f, tcfmStart);
00211         
00212     for (i=0; i<tSeq->size; ++i)
00213         {
00214         /* Put down "anchor" on first match position in haystack
00215          * so user can hop here with a click on the needle. */
00216         if (curBlock < psl->blockCount && psl->tStarts[curBlock] == (i + tStart) )
00217             {
00218             fprintf(f, "<A NAME=%d></A>", ++curBlock);
00219             /* Watch out for (rare) out-of-order tStarts! */
00220             while (curBlock < psl->blockCount &&
00221                    psl->tStarts[curBlock] <= tStart + i)
00222                 curBlock++;
00223             }
00224         cfmOut(cfm, dna[i], seqOutColorLookup[(int)colorFlags[i]]);
00225         }
00226     cfmFree(&cfm);
00227     freez(&colorFlags);
00228     htmHorizontalLine(f);
00229     }
00230 
00231 /* Display side by side. */
00232 fprintf(f, "</TT></PRE>\n");
00233 fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment*</H4>\n");
00234 fprintf(f, "<PRE><TT>");
00235     {
00236     struct baf baf;
00237     int i,j;
00238 
00239     bafInit(&baf, qSeq->dna, qbafStart, qIsRc,
00240             tSeq->dna, tbafStart, tIsRc, f, 60, isProt);
00241                 
00242     if (isProt)
00243         {
00244         for (i=0; i<psl->blockCount; ++i)
00245             {
00246             int qs = psl->qStarts[i] - qStart;
00247             int ts = psl->tStarts[i] - tStart;
00248             int sz = psl->blockSizes[i];
00249 
00250             bafSetPos(&baf, qs, ts);
00251             bafStartLine(&baf);
00252             for (j=0; j<sz; ++j)
00253                 {
00254                 AA aa = qSeq->dna[qs+j];
00255                 int codonStart = ts + 3*j;
00256                 DNA *codon = &tSeq->dna[codonStart];
00257                 bafOut(&baf, ' ', codon[0]);
00258                 bafOut(&baf, aa, codon[1]);
00259                 bafOut(&baf, ' ', codon[2]);
00260                 }
00261             bafFlushLine(&baf);
00262             }
00263         fprintf( f, "<I>*when aa is different, BLOSUM positives are in green, BLOSUM negatives in red</I>\n");
00264         }
00265     else
00266         {
00267         int lastQe = psl->qStarts[0] - qStart;
00268         int lastTe = psl->tStarts[0] - tStart;
00269         int maxSkip = 20;
00270         bafSetPos(&baf, lastQe, lastTe);
00271         bafStartLine(&baf);
00272         for (i=0; i<psl->blockCount; ++i)
00273             {
00274             int qs = psl->qStarts[i] - qStart;
00275             int ts = psl->tStarts[i] - tStart;
00276             int sz = psl->blockSizes[i];
00277             boolean doBreak = TRUE;
00278             int qSkip = qs - lastQe;
00279             int tSkip = ts - lastTe;
00280 
00281             if (qSkip >= 0 && qSkip <= maxSkip && tSkip == 0)
00282                 {
00283                 for (j=0; j<qSkip; ++j)
00284                     bafOut(&baf, qSeq->dna[lastQe+j], '-');
00285                 doBreak = FALSE;
00286                 }
00287             else if (tSkip > 0 && tSkip <= maxSkip && qSkip == 0)
00288                 {
00289                 for (j=0; j<tSkip; ++j)
00290                     bafOut(&baf, '-', tSeq->dna[lastTe+j]);
00291                 doBreak = FALSE;
00292                 }
00293             if (doBreak)
00294                 {
00295                 bafFlushLine(&baf);
00296                 bafSetPos(&baf, qs, ts);
00297                 bafStartLine(&baf);
00298                 }
00299             for (j=0; j<sz; ++j)
00300                 bafOut(&baf, qSeq->dna[qs+j], tSeq->dna[ts+j]);
00301             lastQe = qs + sz;
00302             lastTe = ts + sz;
00303             }
00304         bafFlushLine(&baf);
00305 
00306         fprintf( f, "<I>*Aligned Blocks with gaps <= %d bases are merged for this display</I>\n", maxSkip);
00307         }
00308     }
00309 fprintf(f, "</TT></PRE>");
00310 if (qIsRc)
00311     reverseComplement(qSeq->dna, qSeq->size);
00312 if (tIsRc)
00313     reverseComplement(tSeq->dna, tSeq->size);
00314 freeMem(dna);
00315 freeMem(qLetters);
00316 }
00317 
00318 
00319 int pslGenoShowAlignment(struct psl *psl, boolean isProt,
00320         char *qName, bioSeq *qSeq, int qStart, int qEnd,
00321         char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f)
00322 /* Show aligned exons between a pre-located gene (a stamper gene)and its homologues (stamp elements)
00323  * in the genome. The aligned exon sequences are shown in blue as regular blat alignment. * The unaligned exon sequence are shown in red. Intron sequences are shown in black */
00324 {
00325 /* At this step we just do a little shuffling of the strands for
00326  * untranslated DNA alignments. */
00327 char origStrand[2];
00328 boolean needsSwap = (psl->strand[0] == '-' && psl->strand[1] == 0);
00329 if (needsSwap)
00330     {
00331     memcpy(origStrand, psl->strand, 2);
00332     pslRcBoth(psl);
00333     psl->strand[0] = '+';
00334     psl->strand[1] = '-';
00335     }
00336 pslShowAlignmentStranded2(psl, isProt, qName, qSeq, qStart, qEnd,
00337     tName, tSeq, tStart, tEnd,exnStarts, exnEnds, exnCnt, f);
00338 if (needsSwap)
00339     {
00340     pslRcBoth(psl);
00341     memcpy(psl->strand, origStrand, 2);
00342     }
00343 return psl->blockCount;
00344 }
00345 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2