#include "nt4.h"Include dependency graph for xenalign.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Functions | |
| int | xenAlignSmall (DNA *query, int querySize, DNA *target, int targetSize, FILE *f, boolean printExtraAtEnds) |
| void | xenStitch (char *inName, FILE *out, int stitchMinScore, boolean compactOutput) |
| void | xenStitcher (char *inName, FILE *out, int stitchMinScore, boolean compactOutput) |
| void | xenAlignBig (DNA *query, int qSize, DNA *target, int tSize, FILE *f, boolean forHtml) |
| void | xenShowAli (char *qSym, char *tSym, char *hSym, int symCount, FILE *f, int qOffset, int tOffset, char qStrand, char tStrand, int maxLineSize) |
| void | xenAlignWorm (DNA *query, int qSize, FILE *f, boolean forHtml) |
Definition at line 1055 of file xenbig.c.
References crudeAliFind(), crudeAli::end, FALSE, tempName::forCgi, freeNt4(), makeTempName(), mustOpen(), newNt4(), crudeAli::next, reverseComplement(), slFreeList(), crudeAli::start, crudeAli::strand, xenAlignSmall(), and xenStitcher().
01057 { 01058 struct crudeAli *caList, *ca; 01059 struct nt4Seq *nt4 = newNt4(target, tSize, "target"); 01060 struct tempName dynTn; 01061 int i; 01062 int lqSize; 01063 int qMaxSize = 2000; 01064 int tMaxSize = 2*qMaxSize; 01065 int lastQsection = qSize - qMaxSize/2; 01066 FILE *dynFile; 01067 int tMin, tMax, tMid; 01068 int score; 01069 int totalAli = 0; 01070 double totalSize; 01071 01072 totalSize = (double)qSize * tSize; 01073 if (totalSize < 10000000.0) 01074 { 01075 xenAlignSmall(query, qSize, target, tSize, f, forHtml); 01076 return; 01077 } 01078 makeTempName(&dynTn, "xen", ".dyn"); 01079 dynFile = mustOpen(dynTn.forCgi, "w"); 01080 01081 for (i = 0; i<lastQsection || i == 0; i += qMaxSize/2) 01082 { 01083 lqSize = qSize - i; 01084 if (lqSize > qMaxSize) 01085 lqSize = qMaxSize; 01086 caList = crudeAliFind(query+i, lqSize, &nt4, 1, 8, 12); 01087 if (forHtml) 01088 { 01089 fputc('.', stdout); 01090 fflush(stdout); 01091 } 01092 for (ca = caList; ca != NULL; ca = ca->next) 01093 { 01094 tMid = (ca->start + ca->end)/2; 01095 tMin = tMid-tMaxSize/2; 01096 tMax = tMin + tMaxSize; 01097 if (tMin < 0) 01098 tMin = 0; 01099 if (tMax > tSize) 01100 tMax = tSize; 01101 01102 fprintf(dynFile, "Aligning query:%d-%d %c to target:%d-%d +\n", 01103 i, i+lqSize, ca->strand, tMin, tMax); 01104 if (ca->strand == '-') 01105 { 01106 reverseComplement(query+i, lqSize); 01107 } 01108 score = xenAlignSmall(query+i, lqSize, target+tMin, tMax-tMin, dynFile, FALSE); 01109 if (ca->strand == '-') 01110 reverseComplement(query+i, lqSize); 01111 fprintf(dynFile, "best score %d\n", score); 01112 if (forHtml) 01113 { 01114 fputc('.', stdout); 01115 fflush(stdout); 01116 } 01117 ++totalAli; 01118 } 01119 slFreeList(&caList); 01120 } 01121 01122 freeNt4(&nt4); 01123 fclose(dynFile); 01124 if (forHtml) 01125 { 01126 printf("\n %d alignments to stitch.\n", totalAli); 01127 } 01128 xenStitcher(dynTn.forCgi, f, 150000, FALSE); 01129 remove(dynTn.forCgi); 01130 }
Here is the call graph for this function:

| int xenAlignSmall | ( | DNA * | query, | |
| int | querySize, | |||
| DNA * | target, | |||
| int | targetSize, | |||
| FILE * | f, | |||
| boolean | printExtraAtEnds | |||
| ) |
Definition at line 214 of file xensmall.c.
References c1c2MatchTable, c1Ix, c2Ix, c3Ix, c3MatchTable, calcCostBenefit(), calcGcRatio(), codingFromHiFiCost, codingFromInsertCost, codingFromLoFiCost, codingSmallGapCost, endState, errAbort(), hiFiFromCodingCost, hiFiFromInsertCost, hiFiFromLoFiCost, hiFiIx, hiFiMatchTable, hiFiSmallGapCost, insertFromCodingCost, insertFromHiFiCost, insertFromLoFiCost, largeGapExtensionCost, phmmState::lastScores, letterIx, loFiFromCodingCost, loFiFromHiFiCost, loFiFromInsertCost, loFiIx, loFiMatchTable, loFiSmallGapCost, matchState, phmmMommy::mommy, phmmMatrixNew(), phmmNameState(), phmmMatrix::qDim, qSlipIx, qSlipState, phmmMatrix::query, shortEndState, startState, phmmMatrix::stateCount, phmmMatrix::states, phmmMatrix::target, phmmMatrix::tDim, tSlipIx, tSlipState, and UBYTE.
Referenced by xenAlignBig(), and xenAlignWorm().
00218 { 00219 struct phmmMatrix *a; 00220 struct phmmState *hf, *lf, *iq, *it, *c1, *c2, *c3; 00221 int qIx, tIx, sIx; /* Query, target, and state indices */ 00222 int rowOffset, newCellOffset; 00223 struct phmmAliPair *pairList; 00224 int matchOff, qSlipOff, tSlipOff; 00225 int bestScore = -0x4fffffff; 00226 struct phmmMommy *bestCell = NULL; 00227 int c1c2PairScore, c3PairScore, loFiPairScore, hiFiPairScore; 00228 int matchTableOffset; 00229 double gcRatio; 00230 00231 /* Check that it's not too big. */ 00232 if ((double)targetSize * querySize > 1.1E7) 00233 errAbort("Can't align %d x %d, too big\n", querySize, targetSize); 00234 00235 /* Set up graph edge costs/benefits */ 00236 gcRatio = calcGcRatio(query, querySize, target, targetSize); 00237 calcCostBenefit(gcRatio); 00238 00239 /* Initialize 7 state matrix. */ 00240 a = phmmMatrixNew(7, query, querySize, target, targetSize); 00241 hf = phmmNameState(a, hiFiIx, "highFi", 'H'); 00242 lf = phmmNameState(a, loFiIx, "lowFi", 'L'); 00243 iq = phmmNameState(a, qSlipIx, "qSlip", 'Q'); 00244 it = phmmNameState(a, tSlipIx, "tSlip", 'T'); 00245 c1 = phmmNameState(a, c1Ix, "frame1", '1'); 00246 c2 = phmmNameState(a, c2Ix, "frame2", '2'); 00247 c3 = phmmNameState(a, c3Ix, "frame3", '3'); 00248 00249 qSlipOff = -a->qDim; 00250 tSlipOff = -1; 00251 matchOff = qSlipOff + tSlipOff; 00252 00253 for (tIx = 1; tIx < a->tDim; tIx += 1) 00254 { 00255 UBYTE mommy = 0; 00256 int score, tempScore; 00257 00258 /* Macros to make me less mixed up when accessing scores from row arrays.*/ 00259 #define matchScore lastScores[qIx-1] 00260 #define qSlipScore lastScores[qIx] 00261 #define tSlipScore scores[qIx-1] 00262 #define newScore scores[qIx] 00263 00264 /* Start up state block (with all ways to enter state) */ 00265 #define startState(state) \ 00266 score = 0; 00267 00268 /* Define a transition from state while advancing over both 00269 * target and query. */ 00270 #define matchState(state, addScore) \ 00271 { \ 00272 if ((tempScore = state->matchScore + addScore) > score) \ 00273 { \ 00274 mommy = phmmPackMommy(state->stateIx, -1, -1); \ 00275 score = tempScore; \ 00276 } \ 00277 } 00278 00279 /* Define a transition from state while slipping query 00280 * and advancing target. */ 00281 #define qSlipState(state, addScore) \ 00282 { \ 00283 if ((tempScore = state->qSlipScore + addScore) > score) \ 00284 { \ 00285 mommy = phmmPackMommy(state->stateIx, 0, -1); \ 00286 score = tempScore; \ 00287 } \ 00288 } 00289 00290 /* Define a transition from state while slipping target 00291 * and advancing query. */ 00292 #define tSlipState(state, addScore) \ 00293 { \ 00294 if ((tempScore = state->tSlipScore + addScore) > score) \ 00295 { \ 00296 mommy = phmmPackMommy(state->stateIx, -1, 0); \ 00297 score = tempScore; \ 00298 } \ 00299 } 00300 00301 /* End a block of transitions into state. */ 00302 #define endState(state) \ 00303 { \ 00304 struct phmmMommy *newCell = state->cells + newCellOffset; \ 00305 if (score <= 0) \ 00306 { \ 00307 mommy = phmmNullMommy; \ 00308 score = 0; \ 00309 } \ 00310 newCell->mommy = mommy; \ 00311 state->newScore = score; \ 00312 if (score > bestScore) \ 00313 { \ 00314 bestScore = score; \ 00315 bestCell = newCell; \ 00316 } \ 00317 } 00318 00319 /* End a state that you know won't produce an optimal 00320 * final score. */ 00321 #define shortEndState(state) \ 00322 { \ 00323 struct phmmMommy *newCell = state->cells + newCellOffset; \ 00324 if (score <= 0) \ 00325 { \ 00326 mommy = phmmNullMommy; \ 00327 score = 0; \ 00328 } \ 00329 newCell->mommy = mommy; \ 00330 state->newScore = score; \ 00331 } 00332 00333 rowOffset = tIx*a->qDim; 00334 for (qIx = 1; qIx < a->qDim; qIx += 1) 00335 { 00336 int qBase = letterIx(a->query[qIx-1]); 00337 int tBase = letterIx(a->target[tIx-1]); 00338 00339 newCellOffset = rowOffset + qIx; 00340 00341 /* Figure the cost or bonus for pairing target and query residue here. */ 00342 matchTableOffset = (qBase<<5) + tBase; 00343 c1c2PairScore = c1c2MatchTable[matchTableOffset]; 00344 c3PairScore = c3MatchTable[matchTableOffset]; 00345 hiFiPairScore = hiFiMatchTable[matchTableOffset]; 00346 loFiPairScore = loFiMatchTable[matchTableOffset]; 00347 00348 /* Update hiFi space. */ 00349 { 00350 startState(hf); 00351 matchState(hf, hiFiPairScore); 00352 qSlipState(hf, hiFiSmallGapCost); 00353 tSlipState(hf, hiFiSmallGapCost); 00354 matchState(iq, hiFiPairScore + hiFiFromInsertCost); 00355 matchState(it, hiFiPairScore + hiFiFromInsertCost); 00356 matchState(lf, hiFiPairScore + hiFiFromLoFiCost); 00357 matchState(c1, hiFiPairScore + hiFiFromCodingCost); 00358 matchState(c2, hiFiPairScore + hiFiFromCodingCost); 00359 matchState(c3, hiFiPairScore + hiFiFromCodingCost); 00360 endState(hf); 00361 } 00362 00363 /* Update loFi space. */ 00364 { 00365 startState(lf); 00366 matchState(lf, loFiPairScore); 00367 qSlipState(lf, loFiSmallGapCost); 00368 tSlipState(lf, loFiSmallGapCost); 00369 matchState(iq, loFiPairScore + loFiFromInsertCost); 00370 matchState(it, loFiPairScore + loFiFromInsertCost); 00371 matchState(hf, loFiPairScore + loFiFromHiFiCost); 00372 matchState(c1, loFiPairScore + loFiFromCodingCost); 00373 matchState(c2, loFiPairScore + loFiFromCodingCost); 00374 matchState(c3, loFiPairScore + loFiFromCodingCost); 00375 endState(lf); 00376 } 00377 00378 /* Update query slip space. */ 00379 { 00380 startState(iq); 00381 qSlipState(iq, largeGapExtensionCost); 00382 qSlipState(hf, insertFromHiFiCost); 00383 qSlipState(lf, insertFromLoFiCost); 00384 qSlipState(c1, insertFromCodingCost); 00385 qSlipState(c2, insertFromCodingCost); 00386 qSlipState(c3, insertFromCodingCost); 00387 qSlipState(it, largeGapExtensionCost); /* Allow double gaps, T first always. */ 00388 shortEndState(iq); 00389 } 00390 00391 /* Update target slip space. */ 00392 { 00393 startState(it); 00394 tSlipState(it, largeGapExtensionCost); 00395 tSlipState(hf, insertFromHiFiCost); 00396 tSlipState(lf, insertFromLoFiCost); 00397 tSlipState(c1, insertFromCodingCost); 00398 tSlipState(c2, insertFromCodingCost); 00399 tSlipState(c3, insertFromCodingCost); 00400 shortEndState(it); 00401 } 00402 00403 /* Update coding1 space. */ 00404 { 00405 startState(c1); 00406 matchState(c3, c1c2PairScore); 00407 qSlipState(c1, codingSmallGapCost); 00408 tSlipState(c1, codingSmallGapCost); 00409 matchState(iq, c1c2PairScore + codingFromInsertCost); 00410 matchState(it, c1c2PairScore + codingFromInsertCost); 00411 matchState(lf, c1c2PairScore + codingFromLoFiCost); 00412 matchState(hf, c1c2PairScore + codingFromHiFiCost); 00413 endState(c1); 00414 } 00415 00416 /* Update coding2 space. */ 00417 { 00418 startState(c2); 00419 matchState(c1, c1c2PairScore); 00420 qSlipState(c2, codingSmallGapCost); 00421 tSlipState(c2, codingSmallGapCost); 00422 matchState(iq, c1c2PairScore + codingFromInsertCost); 00423 matchState(it, c1c2PairScore + codingFromInsertCost); 00424 matchState(lf, c1c2PairScore + codingFromLoFiCost); 00425 matchState(hf, c1c2PairScore + codingFromHiFiCost); 00426 endState(c2); 00427 } 00428 00429 00430 /* Update coding3 space. */ 00431 { 00432 startState(c3); 00433 matchState(c2, c3PairScore); 00434 qSlipState(c3, codingSmallGapCost); 00435 tSlipState(c3, codingSmallGapCost); 00436 matchState(iq, c3PairScore + codingFromInsertCost); 00437 matchState(it, c3PairScore + codingFromInsertCost); 00438 matchState(lf, c3PairScore + codingFromLoFiCost); 00439 matchState(hf, c3PairScore + codingFromHiFiCost); 00440 endState(c3); 00441 } 00442 } 00443 /* Swap score columns so current becomes last, and last gets 00444 * reused. */ 00445 for (sIx = 0; sIx < a->stateCount; ++sIx) 00446 { 00447 struct phmmState *as = &a->states[sIx]; 00448 int *swapTemp = as->lastScores; 00449 as->lastScores = as->scores; 00450 as->scores = swapTemp; 00451 } 00452 } 00453 00454 /* Trace back from best scoring cell. */ 00455 pairList = phmmTraceBack(a, bestCell); 00456 phmmPrintTrace(a, pairList, TRUE, f, printExtraAtEnds); 00457 00458 slFreeList(&pairList); 00459 phmmMatrixFree(&a); 00460 return bestScore; 00461 #undef matchScore 00462 #undef qSlipScore 00463 #undef tSlipScore 00464 #undef newScore 00465 #undef startState 00466 #undef matchState 00467 #undef qSlipState 00468 #undef tSlipState 00469 #undef shortEndState 00470 #undef endState 00471 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void xenAlignWorm | ( | DNA * | query, | |
| int | qSize, | |||
| FILE * | f, | |||
| boolean | forHtml | |||
| ) |
Definition at line 1133 of file xenbig.c.
References chromNames, crudeAliFind(), FALSE, tempName::forCgi, freez(), makeTempName(), mustOpen(), crudeAli::next, reverseComplement(), slCat(), slCount(), slFreeList(), wormChromNames(), wormChromPart(), wormClipRangeToChrom(), wormFreeNt4Genome(), wormLoadNt4Genome(), xenAlignSmall(), and xenStitcher().
01135 { 01136 struct crudeAli *caList = NULL, *ca, *newCa; 01137 struct tempName dynTn; 01138 int i; 01139 int lqSize; 01140 int qMaxSize = 2000; 01141 int tMaxSize = 2*qMaxSize; 01142 int lastQsection = qSize - qMaxSize/2; 01143 FILE *dynFile; 01144 int tMin, tMax, tMid, tSize; 01145 int score; 01146 int chromCount; 01147 char **chromNames; 01148 struct nt4Seq **chrom; 01149 DNA *target; 01150 int sectionCount = 0; 01151 01152 /* Get C. elegans genome and do crude alignments. */ 01153 wormLoadNt4Genome(&chrom, &chromCount); 01154 wormChromNames(&chromNames, &chromCount); 01155 for (i = 0; i<lastQsection || i == 0; i += qMaxSize/2) 01156 { 01157 if (forHtml) 01158 { 01159 fputc('.', stdout); 01160 fflush(stdout); 01161 } 01162 lqSize = qSize - i; 01163 if (lqSize > qMaxSize) 01164 lqSize = qMaxSize; 01165 newCa = crudeAliFind(query+i, lqSize, chrom, chromCount, 8, 45); 01166 for (ca = newCa; ca != NULL; ca = ca->next) 01167 { 01168 ca->qStart = i; 01169 ca->qEnd = i+lqSize; 01170 } 01171 caList = slCat(caList,newCa); 01172 ++sectionCount; 01173 } 01174 wormFreeNt4Genome(&chrom); 01175 01176 /* Make temporary output file. */ 01177 makeTempName(&dynTn, "xen", ".dyn"); 01178 dynFile = mustOpen(dynTn.forCgi, "w"); 01179 01180 /* Do dynamic programming alignment on each crude alignment. */ 01181 if (forHtml) 01182 { 01183 int crudeCount = slCount(caList); 01184 if (crudeCount > 2*sectionCount) 01185 { 01186 printf("\nThis is a difficult alignment. It looks like the query aligns with\n" 01187 "the <I>C. elegans</I> genome in %d places. It will take about %1.0f more\n" 01188 "minutes to finish.\n", (crudeCount+sectionCount/2)/sectionCount, crudeCount*0.5); 01189 } 01190 fflush(stdout); 01191 } 01192 for (ca = caList; ca != NULL; ca = ca->next) 01193 { 01194 if (forHtml) 01195 { 01196 fputc('.', stdout); 01197 fflush(stdout); 01198 } 01199 tMid = (ca->start + ca->end)/2; 01200 tMin = tMid-tMaxSize/2; 01201 tMax = tMin + tMaxSize; 01202 wormClipRangeToChrom(chromNames[ca->chromIx], &tMin, &tMax); 01203 fprintf(dynFile, "Aligning query:%d-%d %c to %s:%d-%d +\n", 01204 ca->qStart, ca->qEnd, ca->strand, chromNames[ca->chromIx], tMin, tMax); 01205 lqSize = ca->qEnd - ca->qStart; 01206 if (ca->strand == '-') 01207 reverseComplement(query+ca->qStart, lqSize); 01208 tSize = tMax - tMin; 01209 target = wormChromPart(chromNames[ca->chromIx], tMin, tSize); 01210 score = xenAlignSmall(query+ca->qStart, lqSize, target, tSize, dynFile, FALSE); 01211 freez(&target); 01212 if (ca->strand == '-') 01213 reverseComplement(query+ca->qStart, lqSize); 01214 fprintf(dynFile, "best score %d\n", score); 01215 } 01216 slFreeList(&caList); 01217 fclose(dynFile); 01218 01219 if (forHtml) 01220 printf("\n"); 01221 xenStitcher(dynTn.forCgi, f, 150000, FALSE); 01222 remove(dynTn.forCgi); 01223 }
Here is the call graph for this function:

| void xenShowAli | ( | char * | qSym, | |
| char * | tSym, | |||
| char * | hSym, | |||
| int | symCount, | |||
| FILE * | f, | |||
| int | qOffset, | |||
| int | tOffset, | |||
| char | qStrand, | |||
| char | tStrand, | |||
| int | maxLineSize | |||
| ) |
Definition at line 34 of file xenshow.c.
References mustWrite(), nonDashCount(), and printMatchers().
00037 { 00038 int i; 00039 int lineSize; 00040 int count; 00041 00042 for (i=0; i<symCount; i += lineSize) 00043 { 00044 lineSize = symCount - i; 00045 if (lineSize > maxLineSize) lineSize = maxLineSize; 00046 mustWrite(f, qSym+i, lineSize); 00047 count = nonDashCount(qSym+i, lineSize); 00048 if (qStrand == '-') 00049 count = -count; 00050 qOffset += count; 00051 fprintf(f, " %9d\n", qOffset); 00052 printMatchers(qSym+i, tSym+i, lineSize, f); 00053 fputc('\n', f); 00054 mustWrite(f, tSym+i, lineSize); 00055 count = nonDashCount(tSym+i, lineSize); 00056 if (tStrand == '-') 00057 count = -count; 00058 tOffset += count; 00059 fprintf(f, " %9d\n", tOffset); 00060 mustWrite(f, hSym+i, lineSize); 00061 fputc('\n', f); 00062 fputc('\n', f); 00063 } 00064 }
Here is the call graph for this function:

| void xenStitch | ( | char * | inName, | |
| FILE * | out, | |||
| int | stitchMinScore, | |||
| boolean | compactOutput | |||
| ) |
| void xenStitcher | ( | char * | inName, | |
| FILE * | out, | |||
| int | stitchMinScore, | |||
| boolean | compactOutput | |||
| ) |
Definition at line 1047 of file xenbig.c.
References FALSE, and xStitch().
Referenced by xenAlignBig(), and xenAlignWorm().
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2