inc/xenalign.h File Reference

#include "nt4.h"

Include dependency graph for xenalign.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

int xenAlignSmall (DNA *query, int querySize, DNA *target, int targetSize, FILE *f, boolean printExtraAtEnds)
void xenStitch (char *inName, FILE *out, int stitchMinScore, boolean compactOutput)
void xenStitcher (char *inName, FILE *out, int stitchMinScore, boolean compactOutput)
void xenAlignBig (DNA *query, int qSize, DNA *target, int tSize, FILE *f, boolean forHtml)
void xenShowAli (char *qSym, char *tSym, char *hSym, int symCount, FILE *f, int qOffset, int tOffset, char qStrand, char tStrand, int maxLineSize)
void xenAlignWorm (DNA *query, int qSize, FILE *f, boolean forHtml)


Function Documentation

void xenAlignBig ( DNA query,
int  qSize,
DNA target,
int  tSize,
FILE *  f,
boolean  forHtml 
)

Definition at line 1055 of file xenbig.c.

References crudeAliFind(), crudeAli::end, FALSE, tempName::forCgi, freeNt4(), makeTempName(), mustOpen(), newNt4(), crudeAli::next, reverseComplement(), slFreeList(), crudeAli::start, crudeAli::strand, xenAlignSmall(), and xenStitcher().

01057 {
01058 struct crudeAli *caList, *ca;
01059 struct nt4Seq *nt4 = newNt4(target, tSize, "target");
01060 struct tempName dynTn;
01061 int i;
01062 int lqSize;
01063 int qMaxSize = 2000;
01064 int tMaxSize = 2*qMaxSize;
01065 int lastQsection = qSize - qMaxSize/2;
01066 FILE *dynFile;
01067 int tMin, tMax, tMid;
01068 int score;
01069 int totalAli = 0;
01070 double totalSize;
01071 
01072 totalSize = (double)qSize * tSize;
01073 if (totalSize < 10000000.0)
01074     {
01075     xenAlignSmall(query, qSize, target, tSize, f, forHtml);
01076     return;
01077     }
01078 makeTempName(&dynTn, "xen", ".dyn");
01079 dynFile = mustOpen(dynTn.forCgi, "w");
01080 
01081 for (i = 0; i<lastQsection || i == 0; i += qMaxSize/2)
01082     {
01083     lqSize = qSize - i;
01084     if (lqSize > qMaxSize)
01085         lqSize = qMaxSize;
01086     caList = crudeAliFind(query+i, lqSize, &nt4, 1, 8, 12);
01087     if (forHtml)
01088         {
01089         fputc('.', stdout);
01090         fflush(stdout);
01091         }
01092     for (ca = caList; ca != NULL; ca = ca->next)
01093         {
01094         tMid = (ca->start + ca->end)/2;
01095         tMin = tMid-tMaxSize/2;
01096         tMax = tMin + tMaxSize;
01097         if (tMin < 0)
01098             tMin = 0;
01099         if (tMax > tSize)
01100             tMax = tSize;
01101         
01102         fprintf(dynFile, "Aligning query:%d-%d %c to target:%d-%d +\n",
01103             i, i+lqSize, ca->strand, tMin, tMax);
01104         if (ca->strand == '-')
01105             {
01106             reverseComplement(query+i, lqSize);
01107             }
01108         score = xenAlignSmall(query+i, lqSize, target+tMin, tMax-tMin, dynFile, FALSE);        
01109         if (ca->strand == '-')
01110             reverseComplement(query+i, lqSize);
01111         fprintf(dynFile, "best score %d\n", score);
01112         if (forHtml)
01113             {
01114             fputc('.', stdout);
01115             fflush(stdout);
01116             }
01117         ++totalAli;
01118         }
01119     slFreeList(&caList);
01120     }
01121 
01122 freeNt4(&nt4);
01123 fclose(dynFile);
01124 if (forHtml)
01125     {
01126     printf("\n %d alignments to stitch.\n", totalAli);
01127     }
01128 xenStitcher(dynTn.forCgi, f, 150000, FALSE);
01129 remove(dynTn.forCgi);
01130 }

Here is the call graph for this function:

int xenAlignSmall ( DNA query,
int  querySize,
DNA target,
int  targetSize,
FILE *  f,
boolean  printExtraAtEnds 
)

Definition at line 214 of file xensmall.c.

References c1c2MatchTable, c1Ix, c2Ix, c3Ix, c3MatchTable, calcCostBenefit(), calcGcRatio(), codingFromHiFiCost, codingFromInsertCost, codingFromLoFiCost, codingSmallGapCost, endState, errAbort(), hiFiFromCodingCost, hiFiFromInsertCost, hiFiFromLoFiCost, hiFiIx, hiFiMatchTable, hiFiSmallGapCost, insertFromCodingCost, insertFromHiFiCost, insertFromLoFiCost, largeGapExtensionCost, phmmState::lastScores, letterIx, loFiFromCodingCost, loFiFromHiFiCost, loFiFromInsertCost, loFiIx, loFiMatchTable, loFiSmallGapCost, matchState, phmmMommy::mommy, phmmMatrixNew(), phmmNameState(), phmmMatrix::qDim, qSlipIx, qSlipState, phmmMatrix::query, shortEndState, startState, phmmMatrix::stateCount, phmmMatrix::states, phmmMatrix::target, phmmMatrix::tDim, tSlipIx, tSlipState, and UBYTE.

Referenced by xenAlignBig(), and xenAlignWorm().

00218 {
00219 struct phmmMatrix *a;
00220 struct phmmState *hf, *lf, *iq, *it, *c1, *c2, *c3;
00221 int qIx, tIx, sIx;  /* Query, target, and state indices */
00222 int rowOffset, newCellOffset;
00223 struct phmmAliPair *pairList;
00224 int matchOff, qSlipOff, tSlipOff;
00225 int bestScore = -0x4fffffff;
00226 struct phmmMommy *bestCell = NULL;
00227 int c1c2PairScore, c3PairScore, loFiPairScore, hiFiPairScore;
00228 int matchTableOffset;
00229 double gcRatio;
00230 
00231 /* Check that it's not too big. */
00232 if ((double)targetSize * querySize > 1.1E7)
00233     errAbort("Can't align %d x %d, too big\n", querySize, targetSize);
00234 
00235 /* Set up graph edge costs/benefits */
00236 gcRatio = calcGcRatio(query, querySize, target, targetSize);
00237 calcCostBenefit(gcRatio);
00238 
00239 /* Initialize 7 state matrix. */
00240 a = phmmMatrixNew(7, query, querySize, target, targetSize);
00241 hf = phmmNameState(a, hiFiIx, "highFi", 'H');
00242 lf = phmmNameState(a, loFiIx, "lowFi", 'L');
00243 iq = phmmNameState(a, qSlipIx, "qSlip", 'Q');
00244 it = phmmNameState(a, tSlipIx, "tSlip", 'T');
00245 c1 = phmmNameState(a, c1Ix, "frame1", '1');
00246 c2 = phmmNameState(a, c2Ix, "frame2", '2');
00247 c3 = phmmNameState(a, c3Ix, "frame3", '3');
00248 
00249 qSlipOff = -a->qDim;
00250 tSlipOff = -1;
00251 matchOff = qSlipOff + tSlipOff;
00252 
00253 for (tIx = 1; tIx < a->tDim; tIx += 1)
00254     {
00255     UBYTE mommy = 0;
00256     int score, tempScore;
00257 
00258 /* Macros to make me less mixed up when accessing scores from row arrays.*/
00259 #define matchScore lastScores[qIx-1]
00260 #define qSlipScore lastScores[qIx]
00261 #define tSlipScore scores[qIx-1]
00262 #define newScore scores[qIx]
00263 
00264 /* Start up state block (with all ways to enter state) */
00265 #define startState(state) \
00266    score = 0;
00267 
00268 /* Define a transition from state while advancing over both
00269  * target and query. */
00270 #define matchState(state, addScore) \
00271    { \
00272    if ((tempScore = state->matchScore + addScore) > score) \
00273         { \
00274         mommy = phmmPackMommy(state->stateIx, -1, -1); \
00275         score = tempScore; \
00276         } \
00277    } 
00278 
00279 /* Define a transition from state while slipping query
00280  * and advancing target. */
00281 #define qSlipState(state, addScore) \
00282    { \
00283    if ((tempScore = state->qSlipScore + addScore) > score) \
00284         { \
00285         mommy = phmmPackMommy(state->stateIx, 0, -1); \
00286         score = tempScore; \
00287         } \
00288    }
00289 
00290 /* Define a transition from state while slipping target
00291  * and advancing query. */
00292 #define tSlipState(state, addScore) \
00293    { \
00294    if ((tempScore = state->tSlipScore + addScore) > score) \
00295         { \
00296         mommy = phmmPackMommy(state->stateIx, -1, 0); \
00297         score = tempScore; \
00298         } \
00299    }
00300 
00301 /* End a block of transitions into state. */
00302 #define endState(state) \
00303     { \
00304     struct phmmMommy *newCell = state->cells + newCellOffset; \
00305     if (score <= 0) \
00306         { \
00307         mommy = phmmNullMommy; \
00308         score = 0; \
00309         } \
00310     newCell->mommy = mommy; \
00311     state->newScore = score; \
00312     if (score > bestScore) \
00313         { \
00314         bestScore = score; \
00315         bestCell = newCell; \
00316         } \
00317     } 
00318 
00319 /* End a state that you know won't produce an optimal
00320  * final score. */
00321 #define shortEndState(state) \
00322     { \
00323     struct phmmMommy *newCell = state->cells + newCellOffset; \
00324     if (score <= 0) \
00325         { \
00326         mommy = phmmNullMommy; \
00327         score = 0; \
00328         } \
00329     newCell->mommy = mommy; \
00330     state->newScore = score; \
00331     }
00332 
00333     rowOffset = tIx*a->qDim;
00334     for (qIx = 1; qIx < a->qDim; qIx += 1)
00335         {
00336         int qBase = letterIx(a->query[qIx-1]);
00337         int tBase = letterIx(a->target[tIx-1]);
00338 
00339         newCellOffset = rowOffset + qIx;
00340         
00341         /* Figure the cost or bonus for pairing target and query residue here. */
00342         matchTableOffset = (qBase<<5) + tBase;
00343         c1c2PairScore = c1c2MatchTable[matchTableOffset];
00344         c3PairScore = c3MatchTable[matchTableOffset];
00345         hiFiPairScore = hiFiMatchTable[matchTableOffset];
00346         loFiPairScore = loFiMatchTable[matchTableOffset];
00347 
00348         /* Update hiFi space. */
00349             {
00350             startState(hf);
00351             matchState(hf, hiFiPairScore);
00352             qSlipState(hf, hiFiSmallGapCost);
00353             tSlipState(hf, hiFiSmallGapCost);
00354             matchState(iq, hiFiPairScore + hiFiFromInsertCost);
00355             matchState(it, hiFiPairScore + hiFiFromInsertCost);
00356             matchState(lf, hiFiPairScore + hiFiFromLoFiCost);
00357             matchState(c1, hiFiPairScore + hiFiFromCodingCost);
00358             matchState(c2, hiFiPairScore + hiFiFromCodingCost);
00359             matchState(c3, hiFiPairScore + hiFiFromCodingCost);
00360             endState(hf);
00361             }
00362 
00363         /* Update loFi space. */
00364             {
00365             startState(lf);
00366             matchState(lf, loFiPairScore);
00367             qSlipState(lf, loFiSmallGapCost);
00368             tSlipState(lf, loFiSmallGapCost);
00369             matchState(iq, loFiPairScore + loFiFromInsertCost);
00370             matchState(it, loFiPairScore + loFiFromInsertCost);
00371             matchState(hf, loFiPairScore + loFiFromHiFiCost);
00372             matchState(c1, loFiPairScore + loFiFromCodingCost);
00373             matchState(c2, loFiPairScore + loFiFromCodingCost);
00374             matchState(c3, loFiPairScore + loFiFromCodingCost);
00375             endState(lf);
00376             }
00377 
00378         /* Update query slip space. */
00379             {
00380             startState(iq);
00381             qSlipState(iq, largeGapExtensionCost);
00382             qSlipState(hf, insertFromHiFiCost);            
00383             qSlipState(lf, insertFromLoFiCost);
00384             qSlipState(c1, insertFromCodingCost);
00385             qSlipState(c2, insertFromCodingCost);
00386             qSlipState(c3, insertFromCodingCost);
00387             qSlipState(it, largeGapExtensionCost); /* Allow double gaps, T first always. */
00388             shortEndState(iq);
00389             }
00390         
00391         /* Update target slip space. */
00392             {
00393             startState(it);
00394             tSlipState(it, largeGapExtensionCost);
00395             tSlipState(hf, insertFromHiFiCost);            
00396             tSlipState(lf, insertFromLoFiCost);
00397             tSlipState(c1, insertFromCodingCost);
00398             tSlipState(c2, insertFromCodingCost);
00399             tSlipState(c3, insertFromCodingCost);
00400             shortEndState(it);
00401             }
00402 
00403         /* Update coding1 space. */
00404             {
00405             startState(c1);
00406             matchState(c3, c1c2PairScore);
00407             qSlipState(c1, codingSmallGapCost);
00408             tSlipState(c1, codingSmallGapCost);
00409             matchState(iq, c1c2PairScore + codingFromInsertCost);
00410             matchState(it, c1c2PairScore + codingFromInsertCost);
00411             matchState(lf, c1c2PairScore + codingFromLoFiCost);
00412             matchState(hf, c1c2PairScore + codingFromHiFiCost);
00413             endState(c1);
00414             }
00415 
00416         /* Update coding2 space. */
00417             {
00418             startState(c2);
00419             matchState(c1, c1c2PairScore);
00420             qSlipState(c2, codingSmallGapCost);
00421             tSlipState(c2, codingSmallGapCost);
00422             matchState(iq, c1c2PairScore + codingFromInsertCost);
00423             matchState(it, c1c2PairScore + codingFromInsertCost);
00424             matchState(lf, c1c2PairScore + codingFromLoFiCost);
00425             matchState(hf, c1c2PairScore + codingFromHiFiCost);
00426             endState(c2);
00427             }
00428 
00429 
00430         /* Update coding3 space. */
00431             {
00432             startState(c3);
00433             matchState(c2, c3PairScore);
00434             qSlipState(c3, codingSmallGapCost);
00435             tSlipState(c3, codingSmallGapCost);
00436             matchState(iq, c3PairScore + codingFromInsertCost);
00437             matchState(it, c3PairScore + codingFromInsertCost);
00438             matchState(lf, c3PairScore + codingFromLoFiCost);
00439             matchState(hf, c3PairScore + codingFromHiFiCost);
00440             endState(c3);
00441             }
00442         }
00443     /* Swap score columns so current becomes last, and last gets
00444      * reused. */
00445     for (sIx = 0; sIx < a->stateCount; ++sIx)
00446         {
00447         struct phmmState *as = &a->states[sIx];
00448         int *swapTemp = as->lastScores;
00449         as->lastScores = as->scores;
00450         as->scores = swapTemp;
00451         }
00452     }
00453 
00454 /* Trace back from best scoring cell. */
00455 pairList = phmmTraceBack(a, bestCell);
00456 phmmPrintTrace(a, pairList, TRUE, f, printExtraAtEnds);
00457 
00458 slFreeList(&pairList);
00459 phmmMatrixFree(&a);
00460 return bestScore;
00461 #undef matchScore
00462 #undef qSlipScore
00463 #undef tSlipScore
00464 #undef newScore
00465 #undef startState
00466 #undef matchState
00467 #undef qSlipState
00468 #undef tSlipState
00469 #undef shortEndState
00470 #undef endState
00471 }

Here is the call graph for this function:

Here is the caller graph for this function:

void xenAlignWorm ( DNA query,
int  qSize,
FILE *  f,
boolean  forHtml 
)

Definition at line 1133 of file xenbig.c.

References chromNames, crudeAliFind(), FALSE, tempName::forCgi, freez(), makeTempName(), mustOpen(), crudeAli::next, reverseComplement(), slCat(), slCount(), slFreeList(), wormChromNames(), wormChromPart(), wormClipRangeToChrom(), wormFreeNt4Genome(), wormLoadNt4Genome(), xenAlignSmall(), and xenStitcher().

01135  {
01136 struct crudeAli *caList = NULL, *ca, *newCa;
01137 struct tempName dynTn;
01138 int i;
01139 int lqSize;
01140 int qMaxSize = 2000;
01141 int tMaxSize = 2*qMaxSize;
01142 int lastQsection = qSize - qMaxSize/2;
01143 FILE *dynFile;
01144 int tMin, tMax, tMid, tSize;
01145 int score;
01146 int chromCount;
01147 char **chromNames;
01148 struct nt4Seq **chrom;
01149 DNA *target;
01150 int sectionCount = 0;
01151 
01152 /* Get C. elegans genome and do crude alignments. */
01153 wormLoadNt4Genome(&chrom, &chromCount);
01154 wormChromNames(&chromNames, &chromCount);
01155 for (i = 0; i<lastQsection || i == 0; i += qMaxSize/2)
01156     {
01157     if (forHtml)
01158         {
01159         fputc('.', stdout);
01160         fflush(stdout);
01161         }
01162     lqSize = qSize - i;
01163     if (lqSize > qMaxSize)
01164         lqSize = qMaxSize;
01165     newCa = crudeAliFind(query+i, lqSize, chrom, chromCount, 8, 45);
01166     for (ca = newCa; ca != NULL; ca = ca->next)
01167         {
01168         ca->qStart = i;
01169         ca->qEnd = i+lqSize;
01170         }
01171     caList = slCat(caList,newCa);
01172     ++sectionCount;
01173     }
01174 wormFreeNt4Genome(&chrom);
01175 
01176 /* Make temporary output file. */
01177 makeTempName(&dynTn, "xen", ".dyn");
01178 dynFile = mustOpen(dynTn.forCgi, "w");
01179 
01180 /* Do dynamic programming alignment on each crude alignment. */
01181 if (forHtml)
01182     {
01183     int crudeCount = slCount(caList);
01184     if (crudeCount > 2*sectionCount)
01185         {
01186         printf("\nThis is a difficult alignment.  It looks like the query aligns with\n"
01187                "the <I>C. elegans</I> genome in %d places.  It will take about %1.0f more\n"
01188                "minutes to finish.\n", (crudeCount+sectionCount/2)/sectionCount, crudeCount*0.5);
01189         }
01190     fflush(stdout);
01191     }
01192 for (ca = caList; ca != NULL; ca = ca->next)
01193     {
01194     if (forHtml)
01195         {
01196         fputc('.', stdout);
01197         fflush(stdout);
01198         }
01199     tMid = (ca->start + ca->end)/2;
01200     tMin = tMid-tMaxSize/2;
01201     tMax = tMin + tMaxSize;
01202     wormClipRangeToChrom(chromNames[ca->chromIx], &tMin, &tMax);
01203     fprintf(dynFile, "Aligning query:%d-%d %c to %s:%d-%d +\n",
01204         ca->qStart, ca->qEnd, ca->strand, chromNames[ca->chromIx], tMin, tMax);
01205     lqSize = ca->qEnd - ca->qStart;
01206     if (ca->strand == '-')
01207         reverseComplement(query+ca->qStart, lqSize);
01208     tSize = tMax - tMin;
01209     target = wormChromPart(chromNames[ca->chromIx], tMin, tSize);
01210     score = xenAlignSmall(query+ca->qStart, lqSize, target, tSize, dynFile, FALSE);        
01211     freez(&target);
01212     if (ca->strand == '-')
01213         reverseComplement(query+ca->qStart, lqSize);
01214     fprintf(dynFile, "best score %d\n", score);
01215     }
01216 slFreeList(&caList);
01217 fclose(dynFile);
01218 
01219 if (forHtml)
01220     printf("\n");
01221 xenStitcher(dynTn.forCgi, f, 150000, FALSE);
01222 remove(dynTn.forCgi);
01223 }

Here is the call graph for this function:

void xenShowAli ( char *  qSym,
char *  tSym,
char *  hSym,
int  symCount,
FILE *  f,
int  qOffset,
int  tOffset,
char  qStrand,
char  tStrand,
int  maxLineSize 
)

Definition at line 34 of file xenshow.c.

References mustWrite(), nonDashCount(), and printMatchers().

00037 {
00038 int i;
00039 int lineSize;
00040 int count;
00041 
00042 for (i=0; i<symCount; i += lineSize)
00043     {
00044     lineSize = symCount - i;
00045     if (lineSize > maxLineSize) lineSize = maxLineSize;
00046     mustWrite(f, qSym+i, lineSize);
00047     count = nonDashCount(qSym+i, lineSize);
00048     if (qStrand == '-')
00049         count = -count;
00050     qOffset += count;
00051     fprintf(f, " %9d\n", qOffset);
00052     printMatchers(qSym+i, tSym+i, lineSize, f);
00053     fputc('\n', f);
00054     mustWrite(f, tSym+i, lineSize);
00055     count = nonDashCount(tSym+i, lineSize);
00056     if (tStrand == '-')
00057         count = -count;
00058     tOffset += count;
00059     fprintf(f, " %9d\n", tOffset);
00060     mustWrite(f, hSym+i, lineSize);
00061     fputc('\n', f);
00062     fputc('\n', f);
00063     }
00064 }

Here is the call graph for this function:

void xenStitch ( char *  inName,
FILE *  out,
int  stitchMinScore,
boolean  compactOutput 
)

Definition at line 1040 of file xenbig.c.

References TRUE, and xStitch().

01043 {
01044 xStitch(inName, out, stitchMinScore, compactOutput, TRUE);
01045 }

Here is the call graph for this function:

void xenStitcher ( char *  inName,
FILE *  out,
int  stitchMinScore,
boolean  compactOutput 
)

Definition at line 1047 of file xenbig.c.

References FALSE, and xStitch().

Referenced by xenAlignBig(), and xenAlignWorm().

01050 {
01051 xStitch(inName, out, stitchMinScore, compactOutput, FALSE);
01052 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Tue Dec 25 19:20:39 2007 for blat by  doxygen 1.5.2