#include "common.h"#include "dystring.h"#include "linefile.h"#include "dnautil.h"#include "blastParse.h"#include "verbose.h"Include dependency graph for blastParse.c:

Go to the source code of this file.
Defines | |
| #define | TRACE_LEVEL 3 |
| #define | DUMP_LEVEL 4 |
Functions | |
| blastFile * | blastFileReadAll (char *fileName) |
| static void | bfError (struct blastFile *bf, char *message) |
| static void | bfUnexpectedEof (struct blastFile *bf) |
| static void | bfSyntax (struct blastFile *bf) |
| static char * | bfNextLine (struct blastFile *bf) |
| static boolean | bfSkipBlankLines (struct blastFile *bf) |
| static char * | bfNeedNextLine (struct blastFile *bf) |
| static char * | bfSearchForLine (struct blastFile *bf, char *start) |
| static void | bfBadHeader (struct blastFile *bf) |
| blastFile * | blastFileOpenVerify (char *fileName) |
| void | decomma (char *s) |
| static void | parseQueryLines (struct blastFile *bf, char *line, struct blastQuery *bq) |
| static void | parseDatabaseLines (struct blastFile *bf, char *line, struct blastQuery *bq) |
| blastQuery * | blastFileNextQuery (struct blastFile *bf) |
| blastGappedAli * | blastFileNextGapped (struct blastFile *bf, struct blastQuery *bq) |
| static int | getStrand (struct blastFile *bf, char *strand) |
| static boolean | nextBlockLine (struct blastFile *bf, char **retLine) |
| double | evalToDouble (char *s) |
| static void | parseBlockLine (struct blastFile *bf, int *startRet, int *endRet, struct dyString *seq) |
| static struct blastBlock * | nextBlock (struct blastFile *bf, struct blastQuery *bq, struct blastGappedAli *bga, boolean *skipRet) |
| blastBlock * | blastFileNextBlock (struct blastFile *bf, struct blastQuery *bq, struct blastGappedAli *bga) |
| void | blastFileFree (struct blastFile **pBf) |
| void | blastFileFreeList (struct blastFile **pList) |
| void | blastQueryFree (struct blastQuery **pBq) |
| void | blastQueryFreeList (struct blastQuery **pList) |
| void | blastGappedAliFree (struct blastGappedAli **pBga) |
| void | blastGappedAliFreeList (struct blastGappedAli **pList) |
| void | blastBlockFree (struct blastBlock **pBb) |
| void | blastBlockFreeList (struct blastBlock **pList) |
| void | blastBlockPrint (struct blastBlock *bb, FILE *out) |
| void | blastGappedAliPrint (struct blastGappedAli *ba, FILE *out) |
| void | blastQueryPrint (struct blastQuery *bq, FILE *out) |
| #define DUMP_LEVEL 4 |
| #define TRACE_LEVEL 3 |
Definition at line 10 of file blastParse.c.
Referenced by bfNextLine(), blastFileNextGapped(), blastFileNextQuery(), and nextBlock().
| static void bfBadHeader | ( | struct blastFile * | bf | ) | [static] |
Definition at line 101 of file blastParse.c.
References bfError().
Referenced by blastFileOpenVerify().
00103 { 00104 bfError(bf, "Bad first line\nShould be something like:\n" 00105 "BLASTN 2.0.11 [Jan-20-2000]"); 00106 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void bfError | ( | struct blastFile * | bf, | |
| char * | message | |||
| ) | [static] |
Definition at line 29 of file blastParse.c.
References errAbort(), blastFile::fileName, blastFile::lf, and lineFile::lineIx.
Referenced by bfBadHeader(), bfSyntax(), blastFileNextGapped(), getStrand(), nextBlock(), parseDatabaseLines(), and parseQueryLines().
Here is the call graph for this function:

Here is the caller graph for this function:

| static char* bfNeedNextLine | ( | struct blastFile * | bf | ) | [static] |
Definition at line 79 of file blastParse.c.
References bfNextLine(), and bfUnexpectedEof().
Referenced by blastFileNextGapped(), blastFileNextQuery(), blastFileOpenVerify(), nextBlock(), parseBlockLine(), parseDatabaseLines(), and parseQueryLines().
00081 { 00082 char *line = bfNextLine(bf); 00083 if (line == NULL) 00084 bfUnexpectedEof(bf); 00085 return line; 00086 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static char* bfNextLine | ( | struct blastFile * | bf | ) | [static] |
Definition at line 48 of file blastParse.c.
References blastFile::lf, lineFileNext(), TRACE_LEVEL, and verbose().
Referenced by bfNeedNextLine(), bfSearchForLine(), bfSkipBlankLines(), blastFileNextGapped(), and nextBlockLine().
00050 { 00051 char *line = NULL; 00052 if (lineFileNext(bf->lf, &line, NULL)) 00053 { 00054 verbose(TRACE_LEVEL, "=> %s\n", line); 00055 return line; 00056 } 00057 else 00058 { 00059 verbose(TRACE_LEVEL, "=> EOF\n"); 00060 return NULL; 00061 } 00062 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static char* bfSearchForLine | ( | struct blastFile * | bf, | |
| char * | start | |||
| ) | [static] |
Definition at line 88 of file blastParse.c.
References bfNextLine(), and startsWith().
Referenced by blastFileNextQuery().
00090 { 00091 for (;;) 00092 { 00093 char *line = bfNextLine(bf); 00094 if (line == NULL) 00095 return NULL; 00096 if (startsWith(start, line)) 00097 return line; 00098 } 00099 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean bfSkipBlankLines | ( | struct blastFile * | bf | ) | [static] |
Definition at line 64 of file blastParse.c.
References bfNextLine(), FALSE, blastFile::lf, lineFileReuse(), skipLeadingSpaces(), and TRUE.
Referenced by blastFileNextGapped().
00066 { 00067 char *line = NULL; 00068 while ((line = bfNextLine(bf)) != NULL) 00069 { 00070 if (skipLeadingSpaces(line)[0] != '\0') 00071 { 00072 lineFileReuse(bf->lf); 00073 return TRUE; 00074 } 00075 } 00076 return FALSE; /* EOF */ 00077 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void bfSyntax | ( | struct blastFile * | bf | ) | [static] |
Definition at line 42 of file blastParse.c.
References bfError().
Referenced by nextBlock(), and parseBlockLine().
00044 { 00045 bfError(bf, "Can't cope with BLAST output syntax"); 00046 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void bfUnexpectedEof | ( | struct blastFile * | bf | ) | [static] |
Definition at line 36 of file blastParse.c.
References errAbort(), and blastFile::fileName.
Referenced by bfNeedNextLine(), and blastFileNextQuery().
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastBlockFree | ( | struct blastBlock ** | pBb | ) |
Definition at line 694 of file blastParse.c.
References freeMem(), freez(), blastBlock::qSym, and blastBlock::tSym.
Referenced by blastBlockFreeList(), and nextBlock().
00696 { 00697 struct blastBlock *bb = *pBb; 00698 if (bb != NULL) 00699 { 00700 freeMem(bb->qSym); 00701 freeMem(bb->tSym); 00702 freez(pBb); 00703 } 00704 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastBlockFreeList | ( | struct blastBlock ** | pList | ) |
Definition at line 706 of file blastParse.c.
References blastBlockFree(), and blastBlock::next.
Referenced by blastGappedAliFree().
00708 { 00709 struct blastBlock *el, *next; 00710 for (el = *pList; el != NULL; el = next) 00711 { 00712 next = el->next; 00713 blastBlockFree(&el); 00714 } 00715 *pList = NULL; 00716 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastBlockPrint | ( | struct blastBlock * | bb, | |
| FILE * | out | |||
| ) |
Definition at line 718 of file blastParse.c.
References blastBlock::insertCount, blastBlock::matchCount, blastBlock::qEnd, blastBlock::qStart, blastBlock::qSym, blastBlock::tEnd, blastBlock::totalCount, blastBlock::tStart, and blastBlock::tSym.
Referenced by blastGappedAliPrint().
00720 { 00721 fprintf(out, " blk: %d-%d <=> %d-%d tcnt=%d mcnt=%d icnt=%d\n", 00722 bb->qStart, bb->qEnd, bb->tStart, bb->tEnd, 00723 bb->totalCount, bb->matchCount, bb->insertCount); 00724 fprintf(out, " Q: %s\n", bb->qSym); 00725 fprintf(out, " T: %s\n", bb->tSym); 00726 }
Here is the caller graph for this function:

| void blastFileFree | ( | struct blastFile ** | pBf | ) |
Definition at line 615 of file blastParse.c.
References blastQueryFreeList(), blastFile::buildDate, blastFile::fileName, freeMem(), freez(), blastFile::lf, lineFileClose(), blastFile::program, blastFile::queries, and blastFile::version.
Referenced by blastFileFreeList().
00617 { 00618 struct blastFile *bf = *pBf; 00619 if (bf != NULL) 00620 { 00621 lineFileClose(&bf->lf); 00622 freeMem(bf->fileName); 00623 freeMem(bf->program); 00624 freeMem(bf->version); 00625 freeMem(bf->buildDate); 00626 blastQueryFreeList(&bf->queries); 00627 freez(pBf); 00628 } 00629 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastFileFreeList | ( | struct blastFile ** | pList | ) |
Definition at line 631 of file blastParse.c.
References blastFileFree(), and blastFile::next.
00633 { 00634 struct blastFile *el, *next; 00635 for (el = *pList; el != NULL; el = next) 00636 { 00637 next = el->next; 00638 blastFileFree(&el); 00639 } 00640 *pList = NULL; 00641 }
Here is the call graph for this function:

| struct blastBlock* blastFileNextBlock | ( | struct blastFile * | bf, | |
| struct blastQuery * | bq, | |||
| struct blastGappedAli * | bga | |||
| ) | [read] |
Definition at line 601 of file blastParse.c.
References FALSE, and nextBlock().
Referenced by blastFileNextGapped().
00605 { 00606 struct blastBlock *bb = NULL; 00607 boolean skip = FALSE; 00608 00609 while (((bb = nextBlock(bf, bq, bga, &skip)) == NULL) && skip) 00610 continue; /* skip to next one */ 00611 00612 return bb; 00613 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct blastGappedAli* blastFileNextGapped | ( | struct blastFile * | bf, | |
| struct blastQuery * | bq | |||
| ) | [read] |
Definition at line 286 of file blastParse.c.
References AllocVar, bfError(), bfNeedNextLine(), bfNextLine(), bfSkipBlankLines(), blastFileNextBlock(), blastGappedAli::blocks, chopLine, cloneString(), decomma(), blastGappedAli::query, sameString, slAddHead, slReverse(), startsWith(), blastGappedAli::targetName, blastGappedAli::targetSize, TRACE_LEVEL, and verbose().
Referenced by blastFileNextQuery().
00289 { 00290 char *line; 00291 char *words[16]; 00292 int wordCount; 00293 struct blastGappedAli *bga; 00294 struct blastBlock *bb; 00295 int lenSearch; 00296 00297 verbose(TRACE_LEVEL, "blastFileNextGapped\n"); 00298 AllocVar(bga); 00299 bga->query = bq; 00300 00301 /* First line should be query. */ 00302 if (!bfSkipBlankLines(bf)) 00303 return NULL; 00304 line = bfNextLine(bf); 00305 if (startsWith(" Database:", line)) 00306 return NULL; 00307 if (line[0] != '>') 00308 bfError(bf, "Expecting >target"); 00309 bga->targetName = cloneString(line+1); 00310 00311 /* Process something like: 00312 * Length = 100000 00313 * however this follows a possible multi-line description, so be specified 00314 * and limit how far we can scan 00315 */ 00316 for (lenSearch=0; lenSearch<25; lenSearch++) 00317 { 00318 line = bfNeedNextLine(bf); 00319 wordCount = chopLine(line, words); 00320 if (wordCount == 3 && sameString(words[0], "Length") && sameString(words[1], "=") 00321 && isdigit(words[2][0])) 00322 break; 00323 } 00324 if (lenSearch>=25) 00325 bfError(bf, "Expecting Length ="); 00326 decomma(words[2]); 00327 bga->targetSize = atoi(words[2]); 00328 00329 /* Get all the blocks. */ 00330 while ((bb = blastFileNextBlock(bf, bq, bga)) != NULL) 00331 { 00332 slAddHead(&bga->blocks, bb); 00333 } 00334 slReverse(&bga->blocks); 00335 return bga; 00336 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct blastQuery* blastFileNextQuery | ( | struct blastFile * | bf | ) | [read] |
Definition at line 237 of file blastParse.c.
References AllocVar, bfNeedNextLine(), bfSearchForLine(), bfUnexpectedEof(), blastFileNextGapped(), blastQueryPrint(), DUMP_LEVEL, blastQuery::gapped, lineFileReuse(), parseDatabaseLines(), parseQueryLines(), slAddHead, slReverse(), stringIn, TRACE_LEVEL, verbose(), and verboseLevel().
Referenced by blastFileReadAll().
00239 { 00240 char *line; 00241 struct blastQuery *bq; 00242 struct blastGappedAli *bga; 00243 AllocVar(bq); 00244 00245 verbose(TRACE_LEVEL, "blastFileNextQuery\n"); 00246 00247 /* find and parse Query= */ 00248 line = bfSearchForLine(bf, "Query="); 00249 if (line == NULL) 00250 return NULL; 00251 parseQueryLines(bf, line, bq); 00252 00253 /* find and parse Database: */ 00254 line = bfSearchForLine(bf, "Database:"); 00255 if (line == NULL) 00256 bfUnexpectedEof(bf); 00257 parseDatabaseLines(bf, line, bq); 00258 00259 /* Seek to beginning of first gapped alignment. */ 00260 for (;;) 00261 { 00262 line = bfNeedNextLine(bf); 00263 if (line[0] == '>') 00264 { 00265 lineFileReuse(bf->lf); 00266 break; 00267 } 00268 if (stringIn("No hits found", line) != NULL) 00269 break; 00270 } 00271 00272 /* Read in gapped alignments. */ 00273 while ((bga = blastFileNextGapped(bf, bq)) != NULL) 00274 { 00275 slAddHead(&bq->gapped, bga); 00276 } 00277 slReverse(&bq->gapped); 00278 if (verboseLevel() >= DUMP_LEVEL) 00279 { 00280 verbose(DUMP_LEVEL, "blastFileNextQuery result:\n"); 00281 blastQueryPrint(bq, stderr); 00282 } 00283 return bq; 00284 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct blastFile* blastFileOpenVerify | ( | char * | fileName | ) | [read] |
Definition at line 108 of file blastParse.c.
References AllocVar, bfBadHeader(), bfNeedNextLine(), blastFile::buildDate, chopLine, cloneString(), blastFile::fileName, blastFile::lf, lineFileOpen(), blastFile::program, TRUE, blastFile::version, and wildMatch().
Referenced by blastFileReadAll().
00110 { 00111 struct blastFile *bf; 00112 char *line; 00113 char *words[16]; 00114 int wordCount; 00115 struct lineFile *lf; 00116 00117 AllocVar(bf); 00118 bf->lf = lf = lineFileOpen(fileName, TRUE); 00119 bf->fileName = cloneString(fileName); 00120 00121 /* Parse first line - something like: */ 00122 line = bfNeedNextLine(bf); 00123 wordCount = chopLine(line, words); 00124 if (wordCount < 3) 00125 bfBadHeader(bf); 00126 bf->program = cloneString(words[0]); 00127 bf->version = cloneString(words[1]); 00128 bf->buildDate = cloneString(words[2]); 00129 if (!wildMatch("*BLAST*", bf->program)) 00130 bfBadHeader(bf); 00131 if (!isdigit(bf->version[0])) 00132 bfBadHeader(bf); 00133 if (bf->buildDate[0] != '[') 00134 bfBadHeader(bf); 00135 return bf; 00136 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct blastFile* blastFileReadAll | ( | char * | fileName | ) | [read] |
Definition at line 13 of file blastParse.c.
References blastFileNextQuery(), blastFileOpenVerify(), blastFile::lf, lineFileClose(), blastFile::queries, slAddHead, and slReverse().
00015 { 00016 struct blastFile *bf; 00017 struct blastQuery *bq; 00018 00019 bf = blastFileOpenVerify(fileName); 00020 while ((bq = blastFileNextQuery(bf)) != NULL) 00021 { 00022 slAddHead(&bf->queries, bq); 00023 } 00024 slReverse(&bf->queries); 00025 lineFileClose(&bf->lf); 00026 return bf; 00027 }
Here is the call graph for this function:

| void blastGappedAliFree | ( | struct blastGappedAli ** | pBga | ) |
Definition at line 669 of file blastParse.c.
References blastBlockFreeList(), blastGappedAli::blocks, freeMem(), freez(), and blastGappedAli::targetName.
Referenced by blastGappedAliFreeList().
00671 { 00672 struct blastGappedAli *bga = *pBga; 00673 if (bga != NULL) 00674 { 00675 freeMem(bga->targetName); 00676 blastBlockFreeList(&bga->blocks); 00677 freez(pBga); 00678 } 00679 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastGappedAliFreeList | ( | struct blastGappedAli ** | pList | ) |
Definition at line 681 of file blastParse.c.
References blastGappedAliFree(), and blastGappedAli::next.
Referenced by blastQueryFree().
00683 { 00684 struct blastGappedAli *el, *next; 00685 for (el = *pList; el != NULL; el = next) 00686 { 00687 next = el->next; 00688 blastGappedAliFree(&el); 00689 } 00690 *pList = NULL; 00691 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastGappedAliPrint | ( | struct blastGappedAli * | ba, | |
| FILE * | out | |||
| ) |
Definition at line 728 of file blastParse.c.
References blastBlockPrint(), blastGappedAli::blocks, blastBlock::next, blastQuery::query, blastGappedAli::query, and blastGappedAli::targetName.
Referenced by blastQueryPrint().
00730 { 00731 struct blastBlock *bb; 00732 fprintf(out, "%s <=> %s\n", ba->query->query, ba->targetName); 00733 for (bb = ba->blocks; bb != NULL; bb = bb->next) 00734 { 00735 blastBlockPrint(bb, out); 00736 } 00737 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastQueryFree | ( | struct blastQuery ** | pBq | ) |
Definition at line 643 of file blastParse.c.
References blastGappedAliFreeList(), blastQuery::database, freeMem(), freez(), blastQuery::gapped, and blastQuery::query.
Referenced by blastQueryFreeList().
00645 { 00646 struct blastQuery *bq; 00647 if ((bq = *pBq) != NULL) 00648 { 00649 freeMem(bq->query); 00650 freeMem(bq->database); 00651 blastGappedAliFreeList(&bq->gapped); 00652 freez(pBq); 00653 } 00654 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastQueryFreeList | ( | struct blastQuery ** | pList | ) |
Definition at line 656 of file blastParse.c.
References blastQueryFree(), and blastQuery::next.
Referenced by blastFileFree().
00658 { 00659 struct blastQuery *el, *next; 00660 for (el = *pList; el != NULL; el = next) 00661 { 00662 next = el->next; 00663 blastQueryFree(&el); 00664 } 00665 *pList = NULL; 00666 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void blastQueryPrint | ( | struct blastQuery * | bq, | |
| FILE * | out | |||
| ) |
Definition at line 739 of file blastParse.c.
References blastGappedAliPrint(), blastQuery::gapped, and blastGappedAli::next.
Referenced by blastFileNextQuery().
00741 { 00742 struct blastGappedAli *ba; 00743 for (ba = bq->gapped; ba != NULL; ba = ba->next) 00744 blastGappedAliPrint(ba, out); 00745 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void decomma | ( | char * | s | ) |
Definition at line 138 of file blastParse.c.
Referenced by blastFileNextGapped(), parseDatabaseLines(), and parseQueryLines().
00140 { 00141 char *d = s; 00142 char c; 00143 00144 for (;;) 00145 { 00146 c = *s++; 00147 if (c != ',') 00148 *d++ = c; 00149 if (c == 0) 00150 break; 00151 } 00152 }
Here is the caller graph for this function:

| double evalToDouble | ( | char * | s | ) |
Definition at line 370 of file blastParse.c.
References lineFile::buf, and safef().
Referenced by nextBlock().
00375 { 00376 char buf[64]; 00377 if (isdigit(s[0])) 00378 return atof(s); 00379 else 00380 { 00381 safef(buf, sizeof(buf), "1.0%s", s); 00382 return atof(buf); 00383 } 00384 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int getStrand | ( | struct blastFile * | bf, | |
| char * | strand | |||
| ) | [static] |
Definition at line 338 of file blastParse.c.
References bfError(), and sameWord.
Referenced by nextBlock().
00340 { 00341 if (sameWord("Plus", strand)) 00342 return 1; 00343 else if (sameWord("Minus", strand)) 00344 return -1; 00345 else 00346 { 00347 bfError(bf, "Expecting Plus or Minus after Strand"); 00348 return 0; 00349 } 00350 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct blastBlock* nextBlock | ( | struct blastFile * | bf, | |
| struct blastQuery * | bq, | |||
| struct blastGappedAli * | bga, | |||
| boolean * | skipRet | |||
| ) | [static, read] |
Definition at line 430 of file blastParse.c.
References AllocVar, ArraySize, bfError(), bfNeedNextLine(), bfSyntax(), blastBlockFree(), chopByChar(), chopLine, cloneMem(), dyStringClear, evalToDouble(), FALSE, getStrand(), lineFileReuse(), newDyString(), nextBlockLine(), parseBlockLine(), blastQuery::queryBaseCount, reverseIntRange(), sameWord, startsWith(), dyString::string, dyString::stringSize, blastGappedAli::targetSize, TRACE_LEVEL, TRUE, and verbose().
Referenced by blastFileNextBlock().
00435 { 00436 struct blastBlock *bb; 00437 char *line; 00438 char *words[16]; 00439 int wordCount; 00440 char *parts[3]; 00441 int partCount; 00442 static struct dyString *qString = NULL, *tString = NULL; 00443 00444 verbose(TRACE_LEVEL, "blastFileNextBlock\n"); 00445 *skipRet = FALSE; 00446 00447 /* Seek until get something like: 00448 * Score = 8770 bits (4424), Expect = 0.0 00449 * or something that looks like we're done with this gapped 00450 * alignment. */ 00451 for (;;) 00452 { 00453 if (!nextBlockLine(bf, &line)) 00454 return NULL; 00455 if (startsWith(" Score", line)) 00456 break; 00457 } 00458 AllocVar(bb); 00459 bb->gappedAli = bga; 00460 wordCount = chopLine(line, words); 00461 if (wordCount < 8 || !sameWord("Score", words[0]) 00462 || !isdigit(words[2][0]) || !(isdigit(words[7][0]) || words[7][0] == 'e') 00463 || !startsWith("Expect", words[5])) 00464 { 00465 bfError(bf, "Expecting something like:\n" 00466 "Score = 8770 bits (4424), Expect = 0.0"); 00467 } 00468 bb->bitScore = atoi(words[2]); 00469 bb->eVal = evalToDouble(words[7]); 00470 00471 /* Process something like: 00472 * Identities = 8320/9618 (86%), Gaps = 3/9618 (0%) 00473 * or 00474 * Identities = 8320/9618 (86%) 00475 * or 00476 * Identities = 10/19 (52%), Positives = 15/19 (78%), Frame = +2 00477 * (wu-tblastn) 00478 * 00479 * Handle weird cases where the is only a `Score' line, with no `Identities' 00480 * lines by skipping the alignment; they seem line small, junky alignments. 00481 */ 00482 line = bfNeedNextLine(bf); 00483 wordCount = chopLine(line, words); 00484 if (wordCount < 3 || !sameWord("Identities", words[0])) 00485 { 00486 if (wordCount > 1 || sameWord("Score", words[0])) 00487 { 00488 /* ugly hack to skip block with no identities */ 00489 *skipRet = TRUE; 00490 blastBlockFree(&bb); 00491 return NULL; 00492 } 00493 bfError(bf, "Expecting identity count"); 00494 } 00495 partCount = chopByChar(words[2], '/', parts, ArraySize(parts)); 00496 if (partCount != 2 || !isdigit(parts[0][0]) || !isdigit(parts[1][0])) 00497 bfSyntax(bf); 00498 bb->matchCount = atoi(parts[0]); 00499 bb->totalCount = atoi(parts[1]); 00500 if (wordCount >= 7 && sameWord("Gaps", words[4])) 00501 { 00502 if (!isdigit(words[6][0])) 00503 bfSyntax(bf); 00504 bb->insertCount = atoi(words[6]); 00505 } 00506 if ((wordCount >= 11) && sameWord("Frame", words[8])) 00507 { 00508 bb->qStrand = '+'; 00509 bb->tStrand = words[10][0]; 00510 bb->frame = atoi(words[10]); 00511 } 00512 00513 /* Process something like: 00514 * Strand = Plus / Plus (blastn) 00515 * Frame = +1 (tblastn) 00516 * <blank line> (blastp) 00517 * note that wu-tblastn puts frame on Identities line 00518 */ 00519 line = bfNeedNextLine(bf); 00520 wordCount = chopLine(line, words); 00521 if ((wordCount >= 5) && sameWord("Strand", words[0])) 00522 { 00523 bb->qStrand = getStrand(bf, words[2]); 00524 bb->tStrand = getStrand(bf, words[4]); 00525 } 00526 else if ((wordCount >= 3) && sameWord("Frame", words[0])) 00527 { 00528 bb->qStrand = 1; 00529 bb->tStrand = (words[2][0] == '-') ? -1 : 1; 00530 bb->frame = atoi(words[2]); 00531 } 00532 else if (wordCount == 0) 00533 { 00534 /* if we didn't parse frame, default it */ 00535 if (bb->qStrand == 0) 00536 { 00537 bb->qStrand = '+'; 00538 bb->tStrand = '+'; 00539 } 00540 } 00541 else 00542 bfError(bf, "Expecting Strand, Frame or blank line"); 00543 00544 00545 /* Process alignment lines. They come in groups of three 00546 * separated by a blank line - something like: 00547 * Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488 00548 * |||||| |||||||||| ||| ||||||||||||||||||||||| || || |||||||| 00549 * Sbjct: 62966 taccttaacattcctcaatgtttcatcatcgttctctcctccaaatggtgaaagtccgga 63025 00550 */ 00551 bb->qStart = bb->tStart = 0x3fffffff; 00552 bb->qEnd = bb->tEnd = -bb->qStart; 00553 if (qString == NULL) 00554 { 00555 qString = newDyString(50000); 00556 tString = newDyString(50000); 00557 } 00558 else 00559 { 00560 dyStringClear(qString); 00561 dyStringClear(tString); 00562 } 00563 for (;;) 00564 { 00565 /* Fetch next first line. */ 00566 for (;;) 00567 { 00568 if (!nextBlockLine(bf, &line)) 00569 goto DONE; 00570 if (startsWith(" Score", line)) 00571 { 00572 lineFileReuse(bf->lf); 00573 goto DONE; 00574 } 00575 if (startsWith("Query:", line)) 00576 break; 00577 } 00578 lineFileReuse(bf->lf); 00579 parseBlockLine(bf, &bb->qStart, &bb->qEnd, qString); 00580 00581 /* Skip next line. */ 00582 line = bfNeedNextLine(bf); 00583 00584 /* Fetch target sequence line. */ 00585 parseBlockLine(bf, &bb->tStart, &bb->tEnd, tString); 00586 } 00587 DONE: 00588 00589 /* convert to [0..n) and move to strand coords if necessary */ 00590 bb->qStart--; 00591 if (bb->qStrand < 0) 00592 reverseIntRange(&bb->qStart, &bb->qEnd, bq->queryBaseCount); 00593 bb->tStart--; 00594 if (bb->tStrand < 0) 00595 reverseIntRange(&bb->tStart, &bb->tEnd, bga->targetSize); 00596 bb->qSym = cloneMem(qString->string, qString->stringSize+1); 00597 bb->tSym = cloneMem(tString->string, tString->stringSize+1); 00598 return bb; 00599 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean nextBlockLine | ( | struct blastFile * | bf, | |
| char ** | retLine | |||
| ) | [static] |
Definition at line 352 of file blastParse.c.
References bfNextLine(), FALSE, blastFile::lf, lineFileReuse(), startsWith(), and TRUE.
Referenced by nextBlock().
00355 { 00356 struct lineFile *lf = bf->lf; 00357 char *line; 00358 00359 *retLine = line = bfNextLine(bf); 00360 if (line == NULL) 00361 return FALSE; 00362 if (line[0] == '>' || startsWith("Query=", line) || startsWith(" Database:", line)) 00363 { 00364 lineFileReuse(lf); 00365 return FALSE; 00366 } 00367 return TRUE; 00368 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void parseBlockLine | ( | struct blastFile * | bf, | |
| int * | startRet, | |||
| int * | endRet, | |||
| struct dyString * | seq | |||
| ) | [static] |
Definition at line 386 of file blastParse.c.
References bfNeedNextLine(), bfSyntax(), chopLine, dyStringAppend(), max, min, and sameString.
Referenced by nextBlock().
00388 : 00389 * Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488 00390 * 00391 * also handle broken NCBI tblastn output like: 00392 * Sbjct: 1181YYGEQRSTNGQTIQLKTQVFRRFPDDDDESEDHDDPDNAHESPEQEGAEGHFDLHYYENQ 1360 00393 */ 00394 { 00395 char* line = bfNeedNextLine(bf); 00396 int a, b, s, e; 00397 char *words[16]; 00398 int wordCount = chopLine(line, words); 00399 if ((wordCount < 3) || (wordCount > 4) 00400 || !(sameString("Query:", words[0]) || sameString("Sbjct:", words[0]))) 00401 bfSyntax(bf); 00402 00403 /* special handling for broken output */ 00404 if (wordCount == 3) 00405 { 00406 char *p; 00407 if (!isdigit(words[1][0]) || !isdigit(words[2][0])) 00408 bfSyntax(bf); 00409 a = atoi(words[1]); 00410 b = atoi(words[2]); 00411 p = words[1]; 00412 while ((*p != '\0') && (isdigit(*p))) 00413 p++; 00414 dyStringAppend(seq, p); 00415 } 00416 else 00417 { 00418 if (!isdigit(words[1][0]) || !isdigit(words[3][0])) 00419 bfSyntax(bf); 00420 a = atoi(words[1]); 00421 b = atoi(words[3]); 00422 dyStringAppend(seq, words[2]); 00423 } 00424 s = min(a,b); 00425 e = max(a,b); 00426 *startRet = min(s, *startRet); 00427 *endRet = max(e, *endRet); 00428 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void parseDatabaseLines | ( | struct blastFile * | bf, | |
| char * | line, | |||
| struct blastQuery * | bq | |||
| ) | [static] |
Definition at line 194 of file blastParse.c.
References bfError(), bfNeedNextLine(), chopLine, cloneString(), blastQuery::database, blastQuery::dbBaseCount, blastQuery::dbSeqCount, decomma(), dyStringAppend(), dyStringClear, dyStringNew, and dyString::string.
Referenced by blastFileNextQuery().
00195 : 00196 * Database: chr22.fa 00197 * 977 sequences; 95,550,797 total letters 00198 */ 00199 { 00200 static struct dyString *tmpBuf = NULL; 00201 char *words[16]; 00202 int wordCount; 00203 if (bq->database != NULL) 00204 bfError(bf, "already parse Database:"); 00205 00206 if (tmpBuf == NULL) 00207 tmpBuf = dyStringNew(512); 00208 00209 /* parse something like 00210 * Database: celegans98 00211 * some versions of blastp include the absolute path, but 00212 * then split it across lines. 00213 */ 00214 wordCount = chopLine(line, words); 00215 if (wordCount < 2) 00216 bfError(bf, "Expecting database name"); 00217 dyStringClear(tmpBuf); 00218 dyStringAppend(tmpBuf, words[1]); 00219 while (line = bfNeedNextLine(bf), !isspace(line[0])) 00220 { 00221 dyStringAppend(tmpBuf, line); 00222 } 00223 bq->database = cloneString(tmpBuf->string); 00224 00225 /* Process something like: 00226 * 977 sequences; 95,550,797 total letters 00227 */ 00228 wordCount = chopLine(line, words); 00229 if (wordCount < 3 || !isdigit(words[0][0]) || !isdigit(words[2][0])) 00230 bfError(bf, "Expecting database info"); 00231 decomma(words[0]); 00232 decomma(words[2]); 00233 bq->dbSeqCount = atoi(words[0]); 00234 bq->dbBaseCount = atoi(words[2]); 00235 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void parseQueryLines | ( | struct blastFile * | bf, | |
| char * | line, | |||
| struct blastQuery * | bq | |||
| ) | [static] |
Definition at line 154 of file blastParse.c.
References bfError(), bfNeedNextLine(), chopLine, cloneString(), decomma(), blastQuery::query, blastQuery::queryBaseCount, and skipLeadingSpaces().
Referenced by blastFileNextQuery().
00156 { 00157 char *s, *e; 00158 char *words[16]; 00159 int wordCount; 00160 if (bq->query != NULL) 00161 bfError(bf, "already parse Query="); 00162 00163 /* Process something like: 00164 * Query= MM39H11 00630 00165 */ 00166 wordCount = chopLine(line, words); 00167 if (wordCount < 2) 00168 bfError(bf, "No sequence name in query line"); 00169 bq->query = cloneString(words[1]); 00170 00171 for (;;) 00172 { 00173 line = bfNeedNextLine(bf); 00174 s = skipLeadingSpaces(line); 00175 if (s[0] == '(') 00176 break; 00177 } 00178 if (!isdigit(s[1])) 00179 { 00180 bfError(bf, "expecting something like:\n" 00181 " (45,693 letters)"); 00182 } 00183 s += 1; 00184 if ((e = strchr(s, ' ')) == NULL) 00185 { 00186 bfError(bf, "expecting something like:\n" 00187 " (45,693 letters)"); 00188 } 00189 *e = 0; 00190 decomma(s); 00191 bq->queryBaseCount = atoi(s); 00192 }
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2