00001
00002 #include "common.h"
00003 #include "linefile.h"
00004 #include "hash.h"
00005 #include "axt.h"
00006 #include "obscure.h"
00007 #include "genoFind.h"
00008
00009 static char const rcsid[] = "$Id: blastOut.c,v 1.26 2006/03/10 17:43:36 angie Exp $";
00010
00011 struct axtRef
00012
00013 {
00014 struct axtRef *next;
00015 struct axt *axt;
00016 };
00017
00018 static int axtRefCmpScore(const void *va, const void *vb)
00019
00020 {
00021 const struct axtRef *a = *((struct axtRef **)va);
00022 const struct axtRef *b = *((struct axtRef **)vb);
00023 return b->axt->score - a->axt->score;
00024 }
00025
00026 struct targetHits
00027
00028 {
00029 struct targetHits *next;
00030 char *name;
00031 int size;
00032 struct axtRef *axtList;
00033 int score;
00034 };
00035
00036 static void targetHitsFree(struct targetHits **pObj)
00037
00038 {
00039 struct targetHits *obj = *pObj;
00040 if (obj != NULL)
00041 {
00042 freeMem(obj->name);
00043 slFreeList(&obj->axtList);
00044 freez(pObj);
00045 }
00046 }
00047
00048 static void targetHitsFreeList(struct targetHits **pList)
00049
00050 {
00051 struct targetHits *el, *next;
00052
00053 for (el = *pList; el != NULL; el = next)
00054 {
00055 next = el->next;
00056 targetHitsFree(&el);
00057 }
00058 *pList = NULL;
00059 }
00060
00061
00062 static int targetHitsCmpScore(const void *va, const void *vb)
00063
00064 {
00065 const struct targetHits *a = *((struct targetHits **)va);
00066 const struct targetHits *b = *((struct targetHits **)vb);
00067
00068 return b->score - a->score;
00069 }
00070
00071 static int blastzToWublastScore(int bzScore)
00072
00073
00074 {
00075 return bzScore/19;
00076 }
00077
00078 static double blastzScoreToWuBits(int bzScore, boolean isProt)
00079
00080
00081
00082
00083 {
00084 int wuScore = blastzToWublastScore(bzScore);
00085 if (isProt)
00086 return wuScore * 0.3545;
00087 else
00088 return wuScore * 0.1553;
00089 }
00090
00091 static double blastzScoreToWuExpectation(int bzScore, double databaseSize)
00092
00093
00094
00095
00096 {
00097 double logProbOne = -log(2) * bzScore / 32.1948;
00098 return databaseSize * exp(logProbOne);
00099 }
00100
00101 static int blastzScoreToNcbiBits(int bzScore)
00102
00103 {
00104 return round(bzScore * 0.0205);
00105 }
00106
00107 static int blastzScoreToNcbiScore(int bzScore)
00108
00109 {
00110 return round(bzScore * 0.0529);
00111 }
00112
00113 static double blastzScoreToNcbiExpectation(int bzScore)
00114
00115 {
00116 double bits = bzScore * 0.0205;
00117 double logProb = -bits*log(2);
00118 return 3.0e9 * exp(logProb);
00119 }
00120
00121 static double expectationToProbability(double e)
00122
00123
00124
00125 {
00126 if (e < 0.999)
00127 return e;
00128 else
00129 return 0.999;
00130 }
00131
00132 static int countMatches(char *a, char *b, int size)
00133
00134 {
00135 int count = 0;
00136 int i;
00137 for (i=0; i<size; ++i)
00138 if (toupper(a[i]) == toupper(b[i]))
00139 ++count;
00140 return count;
00141 }
00142
00143 static int countGaps(char *a, char *b, int size)
00144
00145 {
00146 int count = 0;
00147 int i;
00148 for (i=0; i<size; ++i)
00149 {
00150 if (a[i] == '-')
00151 ++count;
00152 if (b[i] == '-')
00153 ++count;
00154 }
00155 return count;
00156 }
00157
00158 static int countGapOpens(char *a, char *b, int size)
00159
00160 {
00161 int i, count = 0;
00162 boolean inGap = FALSE;
00163 for (i=0; i<size; ++i)
00164 {
00165 if (a[i] == '-' || b[i] == '-')
00166 {
00167 if (!inGap)
00168 {
00169 ++count;
00170 inGap = TRUE;
00171 }
00172 }
00173 else
00174 {
00175 inGap = FALSE;
00176 }
00177 }
00178 return count;
00179 }
00180
00181
00182 static int countPositives(char *a, char *b, int size)
00183
00184 {
00185 int count = 0;
00186 int i;
00187 struct axtScoreScheme *ss = axtScoreSchemeProteinDefault();
00188 for (i=0; i<size; ++i)
00189 {
00190 if (ss->matrix[(int)a[i]][(int)b[i]] > 0)
00191 ++count;
00192 }
00193 return count;
00194 }
00195
00196 static int plusStrandPos(int pos, int size, char strand, boolean isEnd)
00197
00198 {
00199 if (strand == '-')
00200 {
00201 pos = size - pos;
00202 if (isEnd)
00203 ++pos;
00204 }
00205 else
00206 {
00207 if (!isEnd)
00208 ++pos;
00209 }
00210 return pos;
00211 }
00212
00213 static int calcDigitCount(struct axt *axt, int tSize, int qSize)
00214
00215 {
00216 int tDig, qDig;
00217 if (axt->qStrand == '-')
00218 qDig = digitsBaseTen(qSize - axt->qStart + 1);
00219 else
00220 qDig = digitsBaseTen(axt->qEnd);
00221 if (axt->tStrand == '-')
00222 tDig = digitsBaseTen(tSize - axt->tStart + 1);
00223 else
00224 tDig = digitsBaseTen(axt->tEnd);
00225 return max(tDig, qDig);
00226 }
00227
00228 static void blastiodAxtOutput(FILE *f, struct axt *axt, int tSize, int qSize,
00229 int lineSize, boolean isProt, boolean isTranslated)
00230
00231 {
00232 int tOff = axt->tStart;
00233 int dt = (isTranslated ? 3 : 1);
00234 int qOff = axt->qStart;
00235 int lineStart, lineEnd, i;
00236 int digits = calcDigitCount(axt, tSize, qSize);
00237 struct axtScoreScheme *ss = NULL;
00238
00239 if (isProt)
00240 ss = axtScoreSchemeProteinDefault();
00241 for (lineStart = 0; lineStart < axt->symCount; lineStart = lineEnd)
00242 {
00243 lineEnd = lineStart + lineSize;
00244 if (lineEnd > axt->symCount) lineEnd = axt->symCount;
00245 fprintf(f, "Query: %-*d ", digits, plusStrandPos(qOff, qSize, axt->qStrand, FALSE));
00246 for (i=lineStart; i<lineEnd; ++i)
00247 {
00248 char c = axt->qSym[i];
00249 fputc(c, f);
00250 if (c != '-')
00251 ++qOff;
00252 }
00253 fprintf(f, " %-d\n", plusStrandPos(qOff, qSize, axt->qStrand, TRUE));
00254 fprintf(f, " %*s ", digits, " ");
00255 for (i=lineStart; i<lineEnd; ++i)
00256 {
00257 char q, t, c;
00258 q = axt->qSym[i];
00259 t = axt->tSym[i];
00260 if (isProt)
00261 {
00262 if (q == t)
00263 c = q;
00264 else if (ss->matrix[(int)q][(int)t] > 0)
00265 c = '+';
00266 else
00267 c = ' ';
00268 }
00269 else
00270 c = ((toupper(q) == toupper(t)) ? '|' : ' ');
00271
00272 fputc(c, f);
00273 }
00274 fprintf(f, "\n");
00275 fprintf(f, "Sbjct: %-*d ", digits, plusStrandPos(tOff, tSize, axt->tStrand, FALSE));
00276 for (i=lineStart; i<lineEnd; ++i)
00277 {
00278 char c = axt->tSym[i];
00279 fputc(c, f);
00280 if (c != '-')
00281 tOff += dt;
00282 }
00283 fprintf(f, " %-d\n", plusStrandPos(tOff, tSize, axt->tStrand, TRUE));
00284 fprintf(f, "\n");
00285 }
00286 }
00287
00288 static struct targetHits *bundleIntoTargets(struct axtBundle *abList)
00289
00290
00291 {
00292 struct targetHits *targetList = NULL, *target;
00293 struct hash *targetHash = newHash(10);
00294 struct axtBundle *ab;
00295 struct axtRef *ref;
00296
00297
00298
00299 for (ab = abList; ab != NULL; ab = ab->next)
00300 {
00301 struct axt *axt;
00302 for (axt = ab->axtList; axt != NULL; axt = axt->next)
00303 {
00304 target = hashFindVal(targetHash, axt->tName);
00305 if (target == NULL)
00306 {
00307 AllocVar(target);
00308 slAddHead(&targetList, target);
00309 hashAdd(targetHash, axt->tName, target);
00310 target->name = cloneString(axt->tName);
00311 target->size = ab->tSize;
00312 }
00313 if (axt->score > target->score)
00314 target->score = axt->score;
00315 AllocVar(ref);
00316 ref->axt = axt;
00317 slAddHead(&target->axtList, ref);
00318 }
00319 }
00320 slSort(&targetList, targetHitsCmpScore);
00321 for (target = targetList; target != NULL; target = target->next)
00322 slSort(&target->axtList, axtRefCmpScore);
00323
00324 hashFree(&targetHash);
00325 return targetList;
00326 }
00327
00328 static char *nameForStrand(char strand)
00329
00330 {
00331 if (strand == '-')
00332 return "Minus";
00333 else
00334 return "Plus";
00335 }
00336
00337 static char *progType(boolean isProt, struct axtBundle *ab, boolean isUpper)
00338
00339 {
00340 if (!isProt)
00341 return isUpper ? "BLASTN" : "blastn";
00342 else
00343 {
00344 if (ab->axtList->frame != 0)
00345 return isUpper ? "TBLASTN" : "tblastn";
00346 else
00347 return isUpper ? "BLASTP" : "blastp";
00348 }
00349 }
00350
00351 static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
00352 FILE *f,
00353 char *databaseName, int databaseSeqCount, double databaseLetterCount,
00354 char *ourId)
00355
00356 {
00357 char asciiNum[32];
00358 struct targetHits *targetList = NULL, *target;
00359 char *queryName;
00360 int isRc;
00361 int querySize = abList->qSize;
00362 boolean isTranslated = (abList->axtList->frame != 0);
00363
00364
00365 if (ourId == NULL)
00366 ourId = "axtBlastOut";
00367 fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId);
00368 fprintf(f, "\n");
00369 fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n");
00370 fprintf(f, "All Rights Reserved\n");
00371 fprintf(f, "\n");
00372 fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
00373 fprintf(f, "\n");
00374 if (!isProt)
00375 {
00376 fprintf(f, "Notice: this program and its default parameter settings are optimized to find\n");
00377 fprintf(f, "nearly identical sequences very rapidly. For slower but more sensitive\n");
00378 fprintf(f, "alignments please use other methods.\n");
00379 fprintf(f, "\n");
00380 }
00381
00382
00383 queryName = abList->axtList->qName;
00384 fprintf(f, "Query= %s\n", queryName);
00385 fprintf(f, " (%d letters; record %d)\n", abList->qSize, queryIx);
00386 fprintf(f, "\n");
00387 fprintf(f, "Database: %s\n", databaseName);
00388 sprintLongWithCommas(asciiNum, databaseLetterCount);
00389 fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum);
00390 fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n");
00391 fprintf(f, "\n");
00392
00393 targetList = bundleIntoTargets(abList);
00394
00395
00396 fprintf(f, " Smallest\n");
00397 fprintf(f, " Sum\n");
00398 fprintf(f, " High Probability\n");
00399 fprintf(f, "Sequences producing High-scoring Segment Pairs: Score P(N) N\n");
00400 fprintf(f, "\n");
00401 for (target = targetList; target != NULL; target = target->next)
00402 {
00403 double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount);
00404 double p = expectationToProbability(expectation);
00405 fprintf(f, "%-61s %4d %8.1e %2d\n", target->name,
00406 blastzToWublastScore(target->score), p, slCount(target->axtList));
00407 }
00408
00409
00410 for (target = targetList; target != NULL; target = target->next)
00411 {
00412 fprintf(f, "\n\n>%s\n", target->name);
00413 fprintf(f, " Length = %d\n", target->size);
00414 fprintf(f, "\n");
00415 for (isRc=0; isRc <= 1; ++isRc)
00416 {
00417 boolean saidStrand = FALSE;
00418 char strand = (isRc ? '-' : '+');
00419 char *strandName = nameForStrand(strand);
00420 struct axtRef *ref;
00421 struct axt *axt;
00422 for (ref = target->axtList; ref != NULL; ref = ref->next)
00423 {
00424 axt = ref->axt;
00425 if (axt->qStrand == strand)
00426 {
00427 int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
00428 int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
00429 if (!saidStrand)
00430 {
00431 saidStrand = TRUE;
00432 if (!isProt)
00433 fprintf(f, " %s Strand HSPs:\n\n", strandName);
00434 }
00435 fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n",
00436 blastzToWublastScore(axt->score),
00437 blastzScoreToWuBits(axt->score, isProt),
00438 blastzScoreToWuExpectation(axt->score, databaseLetterCount),
00439 blastzScoreToWuExpectation(axt->score, databaseLetterCount));
00440 fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)",
00441 matches, axt->symCount, round(100.0 * matches / axt->symCount),
00442 positives, axt->symCount, round(100.0 * positives / axt->symCount));
00443 if (isProt)
00444 {
00445 if (axt->frame != 0)
00446 fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame);
00447 fprintf(f, "\n");
00448 }
00449 else
00450 fprintf(f, ", Strand = %s / Plus\n", strandName);
00451 fprintf(f, "\n");
00452 blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
00453 }
00454 }
00455 }
00456 }
00457
00458
00459 targetHitsFreeList(&targetList);
00460 }
00461
00462 static void ncbiPrintE(FILE *f, double e)
00463
00464 {
00465 if (e <= 0.0)
00466 fprintf(f, "0.0");
00467 else if (e <= 1.0e-100)
00468 {
00469 char buf[256], *s;
00470 safef(buf, sizeof(buf), "%e", e);
00471 s = strchr(buf, 'e');
00472 if (s == NULL)
00473 fprintf(f, "0.0");
00474 else
00475 fprintf(f, "%s", s);
00476 }
00477 else
00478 fprintf(f, "%1.0e", e);
00479 }
00480
00481
00482 int answer_for_kg;
00483 static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
00484 FILE *f, char *databaseName, int databaseSeqCount,
00485 double databaseLetterCount, char *ourId, double minIdentity)
00486
00487 {
00488 char asciiNum[32];
00489 struct targetHits *targetList = NULL, *target;
00490 char *queryName;
00491 int querySize = abList->qSize;
00492 boolean isTranslated = (abList->axtList->frame != 0);
00493
00494
00495 if (ourId == NULL)
00496 ourId = "axtBlastOut";
00497 fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId);
00498 fprintf(f, "\n");
00499 fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
00500 fprintf(f, "\n");
00501
00502
00503 queryName = abList->axtList->qName;
00504 fprintf(f, "Query= %s\n", queryName);
00505 fprintf(f, " (%d letters)\n", abList->qSize);
00506 fprintf(f, "\n");
00507 fprintf(f, "Database: %s \n", databaseName);
00508 sprintLongWithCommas(asciiNum, databaseLetterCount);
00509 fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum);
00510 fprintf(f, "\n");
00511 fprintf(f, "Searching.done\n");
00512
00513 targetList = bundleIntoTargets(abList);
00514
00515
00516 fprintf(f, " Score E\n");
00517 fprintf(f, "Sequences producing significant alignments: (bits) Value\n");
00518 fprintf(f, "\n");
00519 for (target = targetList; target != NULL; target = target->next)
00520 {
00521 struct axtRef *ref;
00522 struct axt *axt;
00523 int matches;
00524 double identity, expectation;
00525 int bit;
00526
00527 for (ref = target->axtList; ref != NULL; ref = ref->next)
00528 {
00529 axt = ref->axt;
00530
00531 matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
00532 identity = round(100.0 * matches / axt->symCount);
00533
00534 if (identity < minIdentity) continue;
00535
00536 bit = blastzScoreToNcbiBits(axt->score);
00537 expectation = blastzScoreToNcbiExpectation(axt->score);
00538 fprintf(f, "%-67s %4d ", target->name, bit);
00539 ncbiPrintE(f, expectation);
00540 fprintf(f, "\n");
00541 }
00542 }
00543 fprintf(f, "\n");
00544
00545
00546 for (target = targetList; target != NULL; target = target->next)
00547 {
00548 struct axtRef *ref;
00549 struct axt *axt;
00550 int matches, gaps;
00551 char *oldName;
00552
00553 int ii = 0;
00554 double identity;
00555 oldName = strdup("");
00556
00557 for (ref = target->axtList; ref != NULL; ref = ref->next)
00558 {
00559 ii++;
00560 axt = ref->axt;
00561
00562 matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
00563 identity = round(100.0 * matches / axt->symCount);
00564
00565
00566 if (identity < minIdentity) continue;
00567
00568
00569 if (!sameWord(oldName, target->name))
00570 {
00571 fprintf(f, "\n\n>%s \n", target->name);
00572 fprintf(f, " Length = %d\n", target->size);
00573 oldName = strdup(target->name);
00574 }
00575
00576 fprintf(f, "\n");
00577 fprintf(f, " Score = %d bits (%d), Expect = ",
00578 blastzScoreToNcbiBits(axt->score),
00579 blastzScoreToNcbiScore(axt->score));
00580 ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score));
00581 fprintf(f, "\n");
00582
00583 if (isProt)
00584 {
00585 int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
00586 gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
00587 fprintf(f, " Identities = %d/%d (%d%%),",
00588 matches, axt->symCount, round(100.0 * matches / axt->symCount));
00589 fprintf(f, " Positives = %d/%d (%d%%),",
00590 positives, axt->symCount, round(100.0 * positives / axt->symCount));
00591 fprintf(f, " Gaps = %d/%d (%d%%)\n",
00592 gaps, axt->symCount, round(100.0 * gaps / axt->symCount));
00593 if (axt->frame != 0)
00594 fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame);
00595
00596
00597 answer_for_kg=axt->symCount - matches;
00598 }
00599 else
00600 {
00601 fprintf(f, " Identities = %d/%d (%d%%)\n",
00602 matches, axt->symCount, round(100.0 * matches / axt->symCount));
00603 fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand),
00604 nameForStrand(axt->tStrand));
00605 }
00606 fprintf(f, "\n");
00607 blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
00608 }
00609 }
00610
00611 fprintf(f, " Database: %s\n", databaseName);
00612
00613
00614 targetHitsFreeList(&targetList);
00615 }
00616
00617 static void xmlBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
00618 FILE *f, char *databaseName, int databaseSeqCount,
00619 double databaseLetterCount, char *ourId)
00620
00621
00622
00623 {
00624 char *queryName = abList->axtList->qName;
00625 int querySize = abList->qSize;
00626 int hitNum = 0;
00627 struct targetHits *targetList = NULL, *target;
00628
00629 if (ourId == NULL)
00630 ourId = "axtBlastOut";
00631 fprintf(f, "<?xml version=\"1.0\"?>\n");
00632 fprintf(f, "<!DOCTYPE BlastOutput PUBLIC \"-//NCBI//NCBI BlastOutput/EN\" \"NCBI_BlastOutput.dtd\">\n");
00633 fprintf(f, "<BlastOutput>\n");
00634 fprintf(f, " <BlastOutput_program>%s</BlastOutput_program>\n",
00635 progType(isProt, abList, FALSE));
00636 fprintf(f, " <BlastOutput_version>%s 2.2.4 [%s]</BlastOutput_version>\n",
00637 progType(isProt, abList, FALSE), ourId);
00638 fprintf(f, " <BlastOutput_reference>~Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool</BlastOutput_reference>\n");
00639 fprintf(f, " <BlastOutput_db>%s</BlastOutput_db>\n", databaseName);
00640 fprintf(f, " <BlastOutput_query-ID>%d</BlastOutput_query-ID>\n", queryIx);
00641 fprintf(f, " <BlastOutput_query-def>%s</BlastOutput_query-def>\n", queryName);
00642 fprintf(f, " <BlastOutput_query-len>%d</BlastOutput_query-len>\n", querySize);
00643
00644 if (isProt)
00645 {
00646 fprintf(f, " <BlastOutput_param>\n");
00647 fprintf(f, " <Parameters_matrix>BLOSUM62</Parameters_matrix>\n");
00648 fprintf(f, " <Parameters_expect>0.001</Parameters_expect>\n");
00649 fprintf(f, " <Parameters_expect>10</Parameters_expect>\n");
00650 fprintf(f, " <Parameters_gap-extend>1</Parameters_gap-extend>\n");
00651 fprintf(f, " </BlastOutput_param>\n");
00652 }
00653
00654 fprintf(f, " <BlastOutput_iterations>\n");
00655 fprintf(f, " <Iteration>\n");
00656 fprintf(f, " <Iteration_iter-num>1</Iteration_iter-num>\n");
00657 fprintf(f, " <Iteration_hits>\n");
00658
00659
00660 targetList = bundleIntoTargets(abList);
00661 for (target = targetList; target != NULL; target = target->next)
00662 {
00663 struct axtRef *ref;
00664 fprintf(f, " <hit>\n");
00665 fprintf(f, " <Hit_num>%d</Hit_num>\n", hitNum);
00666 fprintf(f, " <Hit_id>%s</Hit_id>\n", target->name);
00667 fprintf(f, " <Hit_accession>%s</Hit_accession>\n", target->name);
00668 fprintf(f, " <Hit_len>%d</Hit_len>\n", target->size);
00669 fprintf(f, " <Hit_hsps>\n");
00670 for (ref = target->axtList; ref != NULL; ref = ref->next)
00671 {
00672 struct axt *axt = ref->axt;
00673 int hspIx = 0;
00674 fprintf(f, " <Hsp>\n");
00675 fprintf(f, " <Hsp_num>%d</Hsp_num>\n", ++hspIx);
00676 fprintf(f, " <Hsp_bit-score>%d</Hsp_bit-score>\n",
00677 blastzScoreToNcbiBits(axt->score));
00678 fprintf(f, " <Hsp_score>%d</Hsp_score>\n",
00679 blastzScoreToNcbiScore(axt->score));
00680 fprintf(f, " <Hsp_evalue>%f</Hsp_evalue>\n",
00681 blastzScoreToNcbiExpectation(axt->score));
00682 fprintf(f, " <Hsp_query-from>%d</Hsp_query-from>\n",
00683 axt->qStart+1);
00684 fprintf(f, " <Hsp_query-to>%d</Hsp_query-to>\n", axt->qEnd);
00685 fprintf(f, " <Hsp_hit-from>%d</Hsp_hit-from>\n",
00686 axt->tStart+1);
00687 fprintf(f, " <Hsp_hit-to>%d</Hsp_hit-to>\n", axt->tEnd);
00688 fprintf(f, " </Hsp>\n");
00689 }
00690
00691 fprintf(f, " </Hit_hsps>\n");
00692 }
00693
00694 fprintf(f, " </Iteration_hits>\n");
00695 fprintf(f, " </Iteration>\n");
00696 fprintf(f, " </BlastOutput_iterations>\n");
00697 fprintf(f, "</BlastOutput>\n");
00698 }
00699
00700 static void printAxtTargetBlastTab(FILE *f, struct axt *axt, int targetSize)
00701
00702 {
00703 int s = axt->tStart, e = axt->tEnd;
00704 if (axt->tStrand == '-')
00705 reverseIntRange(&s, &e, targetSize);
00706 if (axt->tStrand == axt->qStrand)
00707 {
00708 fprintf(f, "%d\t", s+1);
00709 fprintf(f, "%d\t", e);
00710 }
00711 else
00712 {
00713 fprintf(f, "%d\t", e);
00714 fprintf(f, "%d\t", s+1);
00715 }
00716 }
00717
00718 static void tabBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
00719 FILE *f, char *databaseName, int databaseSeqCount,
00720 double databaseLetterCount, char *ourId, boolean withComment)
00721
00722 {
00723 char *queryName = abList->axtList->qName;
00724 int querySize = abList->qSize;
00725 struct targetHits *targetList = NULL, *target;
00726
00727 if (withComment)
00728 {
00729 char * rcsDate = "$Date: 2006/03/10 17:43:36 $";
00730 char dateStamp[11];
00731 strncpy (dateStamp, rcsDate+7, 10);
00732 dateStamp[10] = 0;
00733 fprintf(f, "# BLAT %s [%s]\n", gfVersion, dateStamp);
00734 fprintf(f, "# Query: %s\n", queryName);
00735 fprintf(f, "# Database: %s\n", databaseName);
00736 fprintf(f, "%s\n",
00737 "# Fields: Query id, Subject id, % identity, alignment length, "
00738 "mismatches, gap openings, q. start, q. end, s. start, s. end, "
00739 "e-value, bit score");
00740 }
00741
00742
00743 targetList = bundleIntoTargets(abList);
00744 for (target = targetList; target != NULL; target = target->next)
00745 {
00746 struct axtRef *ref;
00747 for (ref = target->axtList; ref != NULL; ref = ref->next)
00748 {
00749 struct axt *axt = ref->axt;
00750 int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
00751 int gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
00752 int gapOpens = countGapOpens(axt->qSym, axt->tSym, axt->symCount);
00753 fprintf(f, "%s\t", axt->qName);
00754 fprintf(f, "%s\t", axt->tName);
00755 fprintf(f, "%.2f\t", 100.0 * matches/axt->symCount);
00756 fprintf(f, "%d\t", axt->symCount);
00757 fprintf(f, "%d\t", axt->symCount - matches - gaps);
00758 fprintf(f, "%d\t", gapOpens);
00759 if (axt->qStrand == '-')
00760 {
00761 int s = axt->qStart, e = axt->qEnd;
00762 reverseIntRange(&s, &e, querySize);
00763 fprintf(f, "%d\t", s+1);
00764 fprintf(f, "%d\t", e);
00765 printAxtTargetBlastTab(f, axt, target->size);
00766 }
00767 else
00768 {
00769 fprintf(f, "%d\t", axt->qStart + 1);
00770 fprintf(f, "%d\t", axt->qEnd);
00771 printAxtTargetBlastTab(f, axt, target->size);
00772 }
00773 fprintf(f, "%3.1e\t", blastzScoreToNcbiExpectation(axt->score));
00774 fprintf(f, "%d.0\n", blastzScoreToNcbiBits(axt->score));
00775 }
00776 }
00777
00778
00779 targetHitsFreeList(&targetList);
00780 }
00781
00782 void axtBlastOut(struct axtBundle *abList,
00783 int queryIx, boolean isProt, FILE *f,
00784 char *databaseName, int databaseSeqCount, double databaseLetterCount,
00785 char *blastType, char *ourId, double minIdentity)
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795 {
00796 if (abList == NULL)
00797 return;
00798 if (sameWord(blastType, "wublast"))
00799 wuBlastOut(abList, queryIx, isProt, f, databaseName,
00800 databaseSeqCount, databaseLetterCount, ourId);
00801 else if (sameWord(blastType, "xml"))
00802 xmlBlastOut(abList, queryIx, isProt, f, databaseName,
00803 databaseSeqCount, databaseLetterCount, ourId);
00804 else if (sameWord(blastType, "blast"))
00805 ncbiBlastOut(abList, queryIx, isProt, f, databaseName,
00806 databaseSeqCount, databaseLetterCount, ourId, minIdentity);
00807 else if (sameWord(blastType, "blast8"))
00808 tabBlastOut(abList, queryIx, isProt, f, databaseName,
00809 databaseSeqCount, databaseLetterCount, ourId, FALSE);
00810 else if (sameWord(blastType, "blast9"))
00811 tabBlastOut(abList, queryIx, isProt, f, databaseName,
00812 databaseSeqCount, databaseLetterCount, ourId, TRUE);
00813 else
00814 errAbort("Unrecognized blastType %s in axtBlastOut", blastType);
00815 }
00816