#include "common.h"#include "sqlNum.h"#include "sqlList.h"#include "localmem.h"#include "psl.h"#include "hash.h"#include "linefile.h"#include "dnaseq.h"#include "dystring.h"#include "fuzzyFind.h"#include "aliType.h"#include "binRange.h"#include "rangeTree.h"Include dependency graph for psl.c:

Go to the source code of this file.
Defines | |
| #define | swapVars(a, b, tmp) ((tmp) = (a), (a) = (b), (b) = (tmp)) |
Functions | |
| psl * | pslxLoad (char **row) |
| psl * | pslLoad (char **row) |
| psl * | pslCommaIn (char **pS, struct psl *ret) |
| void | pslFree (struct psl **pEl) |
| void | pslFreeList (struct psl **pList) |
| void | pslOutput (struct psl *el, FILE *f, char sep, char lastSep) |
| void | pslOutputShort (struct psl *el, FILE *f, char sep, char lastSep) |
| void | pslOutFormat (struct psl *el, FILE *f, char sep, char lastSep) |
| psl * | pslLoadAll (char *fileName) |
| int | pslCmpQuery (const void *va, const void *vb) |
| int | pslCmpTarget (const void *va, const void *vb) |
| int | pslCmpTargetAndStrand (const void *va, const void *vb) |
| int | pslCmpScore (const void *va, const void *vb) |
| int | pslCmpQueryScore (const void *va, const void *vb) |
| int | pslCmpMatch (const void *va, const void *vb) |
| static void | pslLabelColumns (FILE *f) |
| void | pslxWriteHead (FILE *f, enum gfType qType, enum gfType tType) |
| void | pslWriteHead (FILE *f) |
| void | pslWriteAll (struct psl *pslList, char *fileName, boolean writeHeader) |
| void | pslxFileOpen (char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf) |
| static void | pslxFileOpenWithMetaConfig (char *fileName, bool isMetaUnique, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f) |
| void | pslxFileOpenWithMeta (char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f) |
| void | pslxFileOpenWithUniqueMeta (char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f) |
| lineFile * | pslFileOpen (char *fileName) |
| lineFile * | pslFileOpenWithMeta (char *fileName, FILE *f) |
| lineFile * | pslFileOpenWithUniqueMeta (char *fileName, FILE *f) |
| psl * | pslNext (struct lineFile *lf) |
| psl * | pslxLoadLm (char **row, struct lm *lm) |
| psl * | pslLoadLm (char **row, struct lm *lm) |
| boolean | pslIsProtein (const struct psl *psl) |
| int | pslCalcMilliBad (struct psl *psl, boolean isMrna) |
| int | pslScore (const struct psl *psl) |
| int | pslCmpScoreDesc (const void *va, const void *vb) |
| ffAli * | pslToFakeFfAli (struct psl *psl, DNA *needle, DNA *haystack) |
| psl * | pslFromFakeFfAli (struct ffAli *ff, DNA *needle, DNA *haystack, char strand, char *qName, int qSize, char *tName, int tSize) |
| ffAli * | pslToFfAli (struct psl *psl, struct dnaSeq *query, struct dnaSeq *target, int targetOffset) |
| int | pslOrientation (struct psl *psl) |
| int | pslWeightedIntronOrientation (struct psl *psl, struct dnaSeq *genoSeq, int offset) |
| int | pslIntronOrientation (struct psl *psl, struct dnaSeq *genoSeq, int offset) |
| boolean | pslHasIntron (struct psl *psl, struct dnaSeq *seq, int seqOffset) |
| void | pslTailSizes (struct psl *psl, int *retStartTail, int *retEndTail) |
| static void | rcSeqs (char **seqs, unsigned blockCount, unsigned *blockSizes) |
| void | pslRcBoth (struct psl *psl) |
| void | pslRc (struct psl *psl) |
| static void | swapBlocks (struct psl *psl) |
| static void | swapRCBlocks (struct psl *psl) |
| void | pslSwap (struct psl *psl, boolean noRc) |
| void | pslTargetOffset (struct psl *psl, int offset) |
| void | pslDump (struct psl *psl, FILE *f) |
| static void | pslRecalcBounds (struct psl *psl) |
| psl * | pslTrimToTargetRange (struct psl *oldPsl, int tMin, int tMax) |
| psl * | pslTrimToQueryRange (struct psl *oldPsl, int qMin, int qMax) |
| char * | pslGetCreateSql (char *table, unsigned options, int tNameIdxLen) |
| static void | printPslDesc (char *pslDesc, FILE *out, struct psl *psl) |
| static void | chkRanges (char *pslDesc, FILE *out, struct psl *psl, char *pName, char *pLabel, char pCLabel, char pStrand, unsigned pSize, unsigned pStart, unsigned pEnd, unsigned blockCount, unsigned *blockSizes, unsigned *pBlockStarts, int *errCountPtr) |
| int | pslCheck (char *pslDesc, FILE *out, struct psl *psl) |
| hash * | readPslToBinKeeper (char *sizeFileName, char *pslFileName) |
| static int | countInitialChars (char *s, char c) |
| static int | countTerminalChars (char *s, char c) |
| static int | countNonInsert (char *s, int size) |
| static void | trimAlignment (struct psl *psl, char **qStringPtr, char **tStringPtr, int *aliSizePtr) |
| static void | addBlock (struct psl *psl, int qs, int qe, int ts, int te, int *blockSpace) |
| static void | accumCounts (struct psl *psl, char prevQ, char prevT, char q, char t, unsigned options) |
| psl * | pslFromAlign (char *qName, int qSize, int qStart, int qEnd, char *qString, char *tName, int tSize, int tStart, int tEnd, char *tString, char *strand, unsigned options) |
| psl * | pslNew (char *qName, unsigned qSize, int qStart, int qEnd, char *tName, unsigned tSize, int tStart, int tEnd, char *strand, unsigned blockSpace, unsigned opts) |
| void | pslGrow (struct psl *psl, int *blockSpacePtr) |
| int | pslRangeTreeOverlap (struct psl *psl, struct rbTree *rangeTree) |
Variables | |
| static char const | rcsid [] = "$Id: psl.c,v 1.76 2007/03/04 20:24:19 kent Exp $" |
| static char * | createString |
| static char * | indexString |
| #define swapVars | ( | a, | |||
| b, | |||||
| tmp | ) | ((tmp) = (a), (a) = (b), (b) = (tmp)) |
| static void accumCounts | ( | struct psl * | psl, | |
| char | prevQ, | |||
| char | prevT, | |||
| char | q, | |||
| char | t, | |||
| unsigned | options | |||
| ) | [static] |
Definition at line 1797 of file psl.c.
References psl::match, psl::misMatch, psl::nCount, PSL_IS_SOFTMASK, psl::qBaseInsert, psl::qNumInsert, psl::repMatch, psl::tBaseInsert, and psl::tNumInsert.
Referenced by pslFromAlign().
01800 { 01801 if ((q != '-') && (t != '-')) 01802 { 01803 /* aligned column. */ 01804 char qu = toupper(q); 01805 char tu = toupper(t); 01806 if ((q == 'N') || (t == 'N')) 01807 psl->nCount++; 01808 else if (qu == tu) 01809 { 01810 if ((options & PSL_IS_SOFTMASK) && ((qu != q) || (tu != t))) 01811 psl->repMatch++; 01812 else 01813 psl->match++; 01814 } 01815 else 01816 psl->misMatch++; 01817 } 01818 else if ((q == '-') && (t != '-')) 01819 { 01820 /* target insert */ 01821 psl->tBaseInsert++; 01822 if (prevQ != '-') 01823 psl->tNumInsert++; 01824 } 01825 else if ((t == '-') && (q != '-')) 01826 { 01827 /* query insert */ 01828 psl->qBaseInsert++; 01829 if (prevT != '-') 01830 psl->qNumInsert++; 01831 } 01832 }
Here is the caller graph for this function:

| static void addBlock | ( | struct psl * | psl, | |
| int | qs, | |||
| int | qe, | |||
| int | ts, | |||
| int | te, | |||
| int * | blockSpace | |||
| ) | [static] |
Definition at line 1784 of file psl.c.
References psl::blockCount, and pslGrow().
Referenced by pslFromAlign().
01787 { 01788 assert((qe-qs) == (te-ts)); /* same lengths? */ 01789 if (psl->blockCount == *blockSpace) 01790 pslGrow(psl, blockSpace); 01791 psl->blockSizes[psl->blockCount] = qe - qs; 01792 psl->qStarts[psl->blockCount] = qs; 01793 psl->tStarts[psl->blockCount] = ts; 01794 psl->blockCount++; 01795 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void chkRanges | ( | char * | pslDesc, | |
| FILE * | out, | |||
| struct psl * | psl, | |||
| char * | pName, | |||
| char * | pLabel, | |||
| char | pCLabel, | |||
| char | pStrand, | |||
| unsigned | pSize, | |||
| unsigned | pStart, | |||
| unsigned | pEnd, | |||
| unsigned | blockCount, | |||
| unsigned * | blockSizes, | |||
| unsigned * | pBlockStarts, | |||
| int * | errCountPtr | |||
| ) | [static] |
Definition at line 1522 of file psl.c.
References printPslDesc().
Referenced by pslCheck().
01528 { 01529 int errCount = *errCountPtr; 01530 unsigned iBlk, prevBlkEnd = 0; 01531 01532 if (pStart >= pEnd) 01533 { 01534 if (errCount == 0) 01535 printPslDesc(pslDesc, out, psl); 01536 fprintf(out, "\t%s %cStart %u >= %cEnd %u\n", 01537 pName, pCLabel, pStart, pCLabel, pEnd); 01538 errCount++; 01539 } 01540 if (pEnd > pSize) 01541 { 01542 if (errCount == 0) 01543 printPslDesc(pslDesc, out, psl); 01544 fprintf(out, "\t%s %cEnd %u >= %cSize %u\n", 01545 pName, pCLabel, pEnd, pCLabel, pSize); 01546 errCount++; 01547 } 01548 for (iBlk = 0; iBlk < blockCount; iBlk++) 01549 { 01550 unsigned blkStart = pBlockStarts[iBlk]; 01551 unsigned blkEnd = blkStart+blockSizes[iBlk]; 01552 /* translate stand to genomic coords */ 01553 unsigned gBlkStart = (pStrand == '+') ? blkStart : (pSize - blkEnd); 01554 unsigned gBlkEnd = (pStrand == '+') ? blkEnd : (pSize - blkStart); 01555 01556 if ((pSize > 0) && (blkEnd > pSize)) 01557 { 01558 if (errCount == 0) 01559 printPslDesc(pslDesc, out, psl); 01560 fprintf(out, "\t%s %s block %u end %u > %cSize %u\n", 01561 pName, pLabel, iBlk, blkEnd, pCLabel, pSize); 01562 errCount++; 01563 } 01564 if (gBlkStart < pStart) 01565 { 01566 if (errCount == 0) 01567 printPslDesc(pslDesc, out, psl); 01568 fprintf(out, "\t%s %s block %u start %u < %cStart %u\n", 01569 pName, pLabel, iBlk, gBlkStart, pCLabel, pStart); 01570 errCount++; 01571 } 01572 if (gBlkStart >= pEnd) 01573 { 01574 if (errCount == 0) 01575 printPslDesc(pslDesc, out, psl); 01576 fprintf(out, "\t%s %s block %u start %u >= %cEnd %u\n", 01577 pName, pLabel, iBlk, gBlkStart, pCLabel, pEnd); 01578 errCount++; 01579 } 01580 if (gBlkEnd < pStart) 01581 { 01582 if (errCount == 0) 01583 printPslDesc(pslDesc, out, psl); 01584 fprintf(out, "\t%s %s block %u end %u < %cStart %u\n", 01585 pName, pLabel, iBlk, gBlkEnd, pCLabel, pStart); 01586 errCount++; 01587 } 01588 if (gBlkEnd > pEnd) 01589 { 01590 if (errCount == 0) 01591 printPslDesc(pslDesc, out, psl); 01592 fprintf(out, "\t%s %s block %u end %u > %cEnd %u\n", 01593 pName, pLabel, iBlk, gBlkEnd, pCLabel, pEnd); 01594 errCount++; 01595 } 01596 if ((iBlk > 0) && (blkStart < prevBlkEnd)) 01597 { 01598 if (errCount == 0) 01599 printPslDesc(pslDesc, out, psl); 01600 fprintf(out, "\t%s %s block %u start %u < previous block end %u\n", 01601 pName, pLabel, iBlk, blkStart, prevBlkEnd); 01602 errCount++; 01603 } 01604 prevBlkEnd = blkEnd; 01605 } 01606 *errCountPtr = errCount; 01607 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int countInitialChars | ( | char * | s, | |
| char | c | |||
| ) | [static] |
Definition at line 1699 of file psl.c.
Referenced by trimAlignment().
01701 { 01702 int count = 0; 01703 char d; 01704 while ((d = *s++) != 0) 01705 { 01706 if (c == d) 01707 ++count; 01708 else 01709 break; 01710 } 01711 return count; 01712 }
Here is the caller graph for this function:

| static int countNonInsert | ( | char * | s, | |
| int | size | |||
| ) | [static] |
Definition at line 1729 of file psl.c.
Referenced by trimAlignment().
01732 { 01733 int count = 0; 01734 int i; 01735 for (i=0; i<size; ++i) 01736 if (*s++ != '-') 01737 ++count; 01738 return count; 01739 }
Here is the caller graph for this function:

| static int countTerminalChars | ( | char * | s, | |
| char | c | |||
| ) | [static] |
Definition at line 1714 of file psl.c.
Referenced by trimAlignment().
01716 { 01717 int len = strlen(s), i; 01718 int count = 0; 01719 for (i=len-1; i>=0; --i) 01720 { 01721 if (c == s[i]) 01722 ++count; 01723 else 01724 break; 01725 } 01726 return count; 01727 }
Here is the caller graph for this function:

| static void printPslDesc | ( | char * | pslDesc, | |
| FILE * | out, | |||
| struct psl * | psl | |||
| ) | [static] |
Definition at line 1513 of file psl.c.
References psl::qEnd, psl::qName, psl::qStart, psl::strand, psl::tEnd, psl::tName, and psl::tStart.
Referenced by chkRanges(), and pslCheck().
01515 { 01516 fprintf(out, "Error: invalid PSL: %s:%u-%u %s:%u-%u %s %s\n", 01517 psl->qName, psl->qStart, psl->qEnd, 01518 psl->tName, psl->tStart, psl->tEnd, 01519 psl->strand, pslDesc); 01520 }
Here is the caller graph for this function:

| int pslCalcMilliBad | ( | struct psl * | psl, | |
| boolean | isMrna | |||
| ) |
Definition at line 801 of file psl.c.
References psl::match, min, psl::misMatch, pslIsProtein(), psl::qEnd, psl::qNumInsert, psl::qStart, psl::repMatch, round, psl::tEnd, psl::tNumInsert, and psl::tStart.
Referenced by aliLines().
00803 { 00804 int sizeMul = pslIsProtein(psl) ? 3 : 1; 00805 int qAliSize, tAliSize, aliSize; 00806 int milliBad = 0; 00807 int sizeDif; 00808 int insertFactor; 00809 int total; 00810 00811 qAliSize = sizeMul * (psl->qEnd - psl->qStart); 00812 tAliSize = psl->tEnd - psl->tStart; 00813 aliSize = min(qAliSize, tAliSize); 00814 if (aliSize <= 0) 00815 return 0; 00816 sizeDif = qAliSize - tAliSize; 00817 if (sizeDif < 0) 00818 { 00819 if (isMrna) 00820 sizeDif = 0; 00821 else 00822 sizeDif = -sizeDif; 00823 } 00824 insertFactor = psl->qNumInsert; 00825 if (!isMrna) 00826 insertFactor += psl->tNumInsert; 00827 00828 total = (sizeMul * (psl->match + psl->repMatch + psl->misMatch)); 00829 if (total != 0) 00830 milliBad = (1000 * (psl->misMatch*sizeMul + insertFactor + round(3*log(1+sizeDif)))) / total; 00831 return milliBad; 00832 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int pslCheck | ( | char * | pslDesc, | |
| FILE * | out, | |||
| struct psl * | psl | |||
| ) |
Definition at line 1609 of file psl.c.
References psl::blockCount, psl::blockSizes, chkRanges(), FALSE, printPslDesc(), pslIsProtein(), psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::qStarts, psl::strand, psl::tEnd, psl::tName, TRUE, psl::tSize, psl::tStart, and psl::tStarts.
01612 { 01613 static char* VALID_STRANDS[] = { 01614 "+", "-", "++", "+-", "-+", "--", NULL 01615 }; 01616 int i, errCount = 0; 01617 char strand; 01618 boolean isProt = FALSE; 01619 01620 /* check strand value */ 01621 for (i = 0; VALID_STRANDS[i] != NULL; i++) 01622 { 01623 if (strcmp(psl->strand, VALID_STRANDS[i]) == 0) 01624 break; 01625 } 01626 if (VALID_STRANDS[i] == NULL) 01627 { 01628 if (errCount == 0) 01629 printPslDesc(pslDesc, out, psl); 01630 fprintf(out, "\tinvalid PSL strand: \"%s\"\n", psl->strand); 01631 errCount++; 01632 } 01633 01634 /* check target */ 01635 if (pslIsProtein(psl)) 01636 { 01637 isProt = TRUE; 01638 for (i = 0; i < psl->blockCount ; i++) 01639 psl->blockSizes[i] *= 3; 01640 } 01641 01642 strand = ((psl->strand[1] == '\0') ? '+' : psl->strand[1]); 01643 chkRanges(pslDesc, out, psl, psl->tName, "target", 't', 01644 strand, psl->tSize, psl->tStart, psl->tEnd, 01645 psl->blockCount, psl->blockSizes, psl->tStarts, 01646 &errCount); 01647 if (isProt) 01648 { 01649 for (i = 0; i < psl->blockCount ; i++) 01650 psl->blockSizes[i] /= 3; 01651 } 01652 01653 /* check query */ 01654 strand = psl->strand[0]; 01655 chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', 01656 strand, psl->qSize, psl->qStart, psl->qEnd, 01657 psl->blockCount, psl->blockSizes, psl->qStarts, 01658 &errCount); 01659 01660 return errCount; 01661 }
Here is the call graph for this function:

| int pslCmpMatch | ( | const void * | va, | |
| const void * | vb | |||
| ) |
| int pslCmpQuery | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 426 of file psl.c.
References psl::qName, and psl::qStart.
Referenced by aliLines().
00428 { 00429 const struct psl *a = *((struct psl **)va); 00430 const struct psl *b = *((struct psl **)vb); 00431 int dif; 00432 dif = strcmp(a->qName, b->qName); 00433 if (dif == 0) 00434 dif = a->qStart - b->qStart; 00435 return dif; 00436 }
Here is the caller graph for this function:

| int pslCmpQueryScore | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 473 of file psl.c.
References pslScore(), and psl::qName.
Referenced by aliLines().
00475 { 00476 const struct psl *a = *((struct psl **)va); 00477 const struct psl *b = *((struct psl **)vb); 00478 int diff = strcmp(a->qName, b->qName); 00479 if (diff == 0) 00480 diff = pslScore(b) - pslScore(a); 00481 return diff; 00482 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int pslCmpScore | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 465 of file psl.c.
References pslScore().
Referenced by aliLines().
00467 { 00468 const struct psl *a = *((struct psl **)va); 00469 const struct psl *b = *((struct psl **)vb); 00470 return pslScore(b) - pslScore(a); 00471 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int pslCmpScoreDesc | ( | const void * | va, | |
| const void * | vb | |||
| ) |
| int pslCmpTarget | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 438 of file psl.c.
References psl::tName, and psl::tStart.
00440 { 00441 const struct psl *a = *((struct psl **)va); 00442 const struct psl *b = *((struct psl **)vb); 00443 int dif; 00444 dif = strcmp(a->tName, b->tName); 00445 if (dif == 0) 00446 dif = a->tStart - b->tStart; 00447 return dif; 00448 }
| int pslCmpTargetAndStrand | ( | const void * | va, | |
| const void * | vb | |||
| ) |
Definition at line 450 of file psl.c.
References psl::strand, psl::tName, and psl::tStart.
00452 { 00453 const struct psl *a = *((struct psl **)va); 00454 const struct psl *b = *((struct psl **)vb); 00455 int dif; 00456 dif = strcmp(a->tName, b->tName); 00457 if (dif == 0) 00458 dif = strcmp(a->strand, b->strand); 00459 if (dif == 0) 00460 dif = a->tStart - b->tStart; 00461 return dif; 00462 }
Definition at line 114 of file psl.c.
References AllocArray, AllocVar, sqlEatChar(), sqlFixedStringComma(), sqlSignedComma(), sqlStringComma(), and sqlUnsignedComma().
00118 { 00119 char *s = *pS; 00120 int i; 00121 00122 if (ret == NULL) 00123 AllocVar(ret); 00124 ret->match = sqlUnsignedComma(&s); 00125 ret->misMatch = sqlUnsignedComma(&s); 00126 ret->repMatch = sqlUnsignedComma(&s); 00127 ret->nCount = sqlUnsignedComma(&s); 00128 ret->qNumInsert = sqlUnsignedComma(&s); 00129 ret->qBaseInsert = sqlSignedComma(&s); 00130 ret->tNumInsert = sqlUnsignedComma(&s); 00131 ret->tBaseInsert = sqlSignedComma(&s); 00132 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); 00133 ret->qName = sqlStringComma(&s); 00134 ret->qSize = sqlUnsignedComma(&s); 00135 ret->qStart = sqlUnsignedComma(&s); 00136 ret->qEnd = sqlUnsignedComma(&s); 00137 ret->tName = sqlStringComma(&s); 00138 ret->tSize = sqlUnsignedComma(&s); 00139 ret->tStart = sqlUnsignedComma(&s); 00140 ret->tEnd = sqlUnsignedComma(&s); 00141 ret->blockCount = sqlUnsignedComma(&s); 00142 s = sqlEatChar(s, '{'); 00143 AllocArray(ret->blockSizes, ret->blockCount); 00144 for (i=0; i<ret->blockCount; ++i) 00145 { 00146 ret->blockSizes[i] = sqlUnsignedComma(&s); 00147 } 00148 s = sqlEatChar(s, '}'); 00149 s = sqlEatChar(s, ','); 00150 s = sqlEatChar(s, '{'); 00151 AllocArray(ret->qStarts, ret->blockCount); 00152 for (i=0; i<ret->blockCount; ++i) 00153 { 00154 ret->qStarts[i] = sqlUnsignedComma(&s); 00155 } 00156 s = sqlEatChar(s, '}'); 00157 s = sqlEatChar(s, ','); 00158 s = sqlEatChar(s, '{'); 00159 AllocArray(ret->tStarts, ret->blockCount); 00160 for (i=0; i<ret->blockCount; ++i) 00161 { 00162 ret->tStarts[i] = sqlUnsignedComma(&s); 00163 } 00164 s = sqlEatChar(s, '}'); 00165 s = sqlEatChar(s, ','); 00166 *pS = s; 00167 return ret; 00168 }
Here is the call graph for this function:

| void pslDump | ( | struct psl * | psl, | |
| FILE * | f | |||
| ) |
Definition at line 1267 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::qEnd, psl::qName, psl::qStart, psl::qStarts, psl::strand, psl::tEnd, psl::tName, psl::tStart, and psl::tStarts.
01269 { 01270 int i; 01271 fprintf(f, "<PRE>\n"); 01272 fprintf(f, "psl %s:%d-%d %s %s:%d-%d %d\n", 01273 psl->qName, psl->qStart, psl->qEnd, psl->strand, 01274 psl->tName, psl->tStart, psl->tEnd, psl->blockCount); 01275 for (i=0; i<psl->blockCount; ++i) 01276 fprintf(f, " size %d, qStart %d, tStart %d\n", 01277 psl->blockSizes[i], psl->qStarts[i], psl->tStarts[i]); 01278 fprintf(f, "</PRE>"); 01279 }
| struct lineFile* pslFileOpen | ( | char * | fileName | ) | [read] |
Definition at line 677 of file psl.c.
References pslxFileOpen().
Referenced by aliLines(), doDetailLine(), and pslLoadAll().
00680 { 00681 enum gfType qt, tt; 00682 struct lineFile *lf; 00683 pslxFileOpen(fileName, &qt, &tt, &lf); 00684 return lf; 00685 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct lineFile* pslFileOpenWithMeta | ( | char * | fileName, | |
| FILE * | f | |||
| ) | [read] |
Definition at line 687 of file psl.c.
References pslxFileOpenWithMeta().
00690 { 00691 enum gfType qt, tt; 00692 struct lineFile *lf; 00693 pslxFileOpenWithMeta(fileName, &qt, &tt, &lf, f); 00694 return lf; 00695 }
Here is the call graph for this function:

| struct lineFile* pslFileOpenWithUniqueMeta | ( | char * | fileName, | |
| FILE * | f | |||
| ) | [read] |
Definition at line 697 of file psl.c.
References pslxFileOpenWithUniqueMeta().
00701 { 00702 enum gfType qt, tt; 00703 struct lineFile *lf; 00704 pslxFileOpenWithUniqueMeta(fileName, &qt, &tt, &lf, f); 00705 return lf; 00706 }
Here is the call graph for this function:

| void pslFree | ( | struct psl ** | pEl | ) |
Definition at line 170 of file psl.c.
References psl::blockSizes, freeMem(), freez(), psl::qName, psl::qSequence, psl::qStarts, psl::tName, psl::tSequence, and psl::tStarts.
Referenced by pslFreeList(), pslFromAlign(), and pslTransMap().
00173 { 00174 struct psl *el; 00175 00176 if ((el = *pEl) == NULL) return; 00177 freeMem(el->qName); 00178 freeMem(el->tName); 00179 freeMem(el->blockSizes); 00180 freeMem(el->qStarts); 00181 freeMem(el->tStarts); 00182 if (el->qSequence) 00183 { 00184 freeMem(el->qSequence[0]); 00185 freeMem(el->qSequence); 00186 } 00187 if (el->tSequence) 00188 { 00189 freeMem(el->tSequence[0]); 00190 freeMem(el->tSequence); 00191 } 00192 freez(pEl); 00193 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslFreeList | ( | struct psl ** | pList | ) |
Definition at line 195 of file psl.c.
References psl::next, and pslFree().
Referenced by aliLines().
00197 { 00198 struct psl *el, *next; 00199 00200 for (el = *pList; el != NULL; el = next) 00201 { 00202 next = el->next; 00203 pslFree(&el); 00204 } 00205 *pList = NULL; 00206 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct psl* pslFromAlign | ( | char * | qName, | |
| int | qSize, | |||
| int | qStart, | |||
| int | qEnd, | |||
| char * | qString, | |||
| char * | tName, | |||
| int | tSize, | |||
| int | tStart, | |||
| int | tEnd, | |||
| char * | tString, | |||
| char * | strand, | |||
| unsigned | options | |||
| ) | [read] |
Definition at line 1834 of file psl.c.
References accumCounts(), addBlock(), AllocVar, errAbort(), FALSE, pslFree(), pslNew(), psl::qEnd, psl::qSize, psl::qStart, reverseIntRange(), psl::tEnd, trimAlignment(), TRUE, psl::tSize, and psl::tStart.
01840 { 01841 /* Note, the unit tests for these programs exercise this function: 01842 * hg/embossToPsl 01843 * hg/mouseStuff/axtToPsl 01844 * hg/spideyToPsl 01845 * utils/est2genomeToPsl 01846 */ 01847 01848 int blockSpace = 16; 01849 struct psl* psl = NULL; 01850 int aliSize = strlen(qString); 01851 boolean eitherInsert = FALSE; /* True if either in insert state. */ 01852 int i, qs,qe,ts,te; 01853 char prevQ = '\0', prevT = '\0'; 01854 AllocVar(psl); 01855 01856 if (strlen(tString) != aliSize) 01857 errAbort("query and target alignment strings are different lengths"); 01858 01859 psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, 01860 strand, blockSpace, 0); 01861 trimAlignment(psl, &qString, &tString, &aliSize); 01862 01863 /* Don't create if either query or target is zero length after trim */ 01864 if ((psl->qStart == psl->qEnd) || (psl->tStart == psl->tEnd)) 01865 { 01866 pslFree(&psl); 01867 return NULL; 01868 } 01869 01870 /* Get range of alignment in strand-specified coordinates */ 01871 qs = psl->qStart; 01872 qe = psl->qEnd; 01873 if (strand[0] == '-') 01874 reverseIntRange(&qs, &qe, psl->qSize); 01875 ts = psl->tStart; 01876 te = psl->tEnd; 01877 if (strand[1] == '-') 01878 reverseIntRange(&ts, &te, psl->tSize); 01879 01880 eitherInsert = FALSE; 01881 qe = qs; /* current block coords */ 01882 te = ts; 01883 for (i=0; i<aliSize; ++i) 01884 { 01885 char q = qString[i]; 01886 char t = tString[i]; 01887 if ((q == '-') && (t == '-')) 01888 { 01889 continue; /* nothing in this column, just ignore it */ 01890 } 01891 else if ((q == '-') || (t == '-')) 01892 { 01893 /* insert in one of the columns */ 01894 if (!eitherInsert) 01895 { 01896 /* end of a block */ 01897 addBlock(psl, qs, qe, ts, te, &blockSpace); 01898 eitherInsert = TRUE; 01899 } 01900 if (q != '-') 01901 qe += 1; 01902 if (t != '-') 01903 te += 1; 01904 } 01905 else 01906 { 01907 /* columns aligned */ 01908 if (eitherInsert) 01909 { 01910 /* start new block */ 01911 qs = qe; 01912 ts = te; 01913 eitherInsert = FALSE; 01914 } 01915 qe += 1; 01916 te += 1; 01917 } 01918 accumCounts(psl, prevQ, prevT, q, t, options); 01919 prevQ = q; /* will not include skipped empty columns */ 01920 prevT = t; 01921 } 01922 addBlock(psl, qs, qe, ts, te, &blockSpace); 01923 return psl; 01924 }
Here is the call graph for this function:

| struct psl* pslFromFakeFfAli | ( | struct ffAli * | ff, | |
| DNA * | needle, | |||
| DNA * | haystack, | |||
| char | strand, | |||
| char * | qName, | |||
| int | qSize, | |||
| char * | tName, | |||
| int | tSize | |||
| ) | [read] |
Definition at line 880 of file psl.c.
References AllocArray, AllocVar, psl::blockCount, psl::blockSizes, cloneString(), ffAliCount(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::qStarts, reverseIntRange(), ffAli::right, psl::strand, psl::tEnd, psl::tName, psl::tSize, psl::tStart, and psl::tStarts.
00886 { 00887 struct psl *psl; 00888 unsigned *blockSizes; 00889 unsigned *qStarts; 00890 unsigned *tStarts; 00891 int blockCount; 00892 int i; 00893 int nStart, hStart; 00894 int nEnd, hEnd; 00895 00896 AllocVar(psl); 00897 psl->blockCount = blockCount = ffAliCount(ff); 00898 psl->blockSizes = AllocArray(blockSizes, blockCount); 00899 psl->qStarts = AllocArray(qStarts, blockCount); 00900 psl->tStarts = AllocArray(tStarts, blockCount); 00901 psl->qName = cloneString(qName); 00902 psl->qSize = qSize; 00903 psl->tName = cloneString(tName); 00904 psl->tSize = tSize; 00905 psl->strand[0] = strand; 00906 00907 for (i=0; i<blockCount; ++i) 00908 { 00909 nStart = ff->nStart - needle; 00910 nEnd = ff->nEnd - needle; 00911 hStart = ff->hStart - haystack; 00912 hEnd = ff->hEnd - haystack; 00913 blockSizes[i] = nEnd - nStart; 00914 qStarts[i] = nStart; 00915 tStarts[i] = hStart; 00916 if (i == 0) 00917 { 00918 psl->qStart = nStart; 00919 psl->tStart = hStart; 00920 } 00921 if (i == blockCount-1) 00922 { 00923 psl->qEnd = nEnd; 00924 psl->tEnd = hEnd; 00925 } 00926 ff = ff->right; 00927 } 00928 if (strand == '-') 00929 { 00930 reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize); 00931 } 00932 return psl; 00933 }
Here is the call graph for this function:

| char* pslGetCreateSql | ( | char * | table, | |
| unsigned | options, | |||
| int | tNameIdxLen | |||
| ) |
Definition at line 1473 of file psl.c.
References cloneString(), createString, dyStringFree, dyStringPrintf(), errAbort(), indexString, newDyString(), PSL_TNAMEIX, PSL_WITH_BIN, PSL_XA_FORMAT, safef(), and dyString::string.
01479 { 01480 struct dyString *sqlCmd = newDyString(2048); 01481 char *sqlCmdStr; 01482 char binIx[32]; 01483 01484 binIx[0] = '\0'; 01485 01486 /* check and default tNameIdxLen */ 01487 if ((tNameIdxLen > 0) && !(options & PSL_TNAMEIX)) 01488 errAbort("pslGetCreateSql: must specify PSL_TNAMEIX with tNameIdxLen > 0"); 01489 if ((options & PSL_TNAMEIX) && (tNameIdxLen == 0)) 01490 tNameIdxLen = 8; 01491 01492 /* setup tName and bin index fields */ 01493 if (options & PSL_WITH_BIN) 01494 { 01495 if (options & PSL_TNAMEIX) 01496 safef(binIx, sizeof(binIx), "INDEX(tName(%d),bin),\n", tNameIdxLen); 01497 else 01498 safef(binIx, sizeof(binIx), "INDEX(bin),\n"); 01499 } 01500 dyStringPrintf(sqlCmd, createString, table, 01501 ((options & PSL_WITH_BIN) ? "bin smallint unsigned not null,\n" : "")); 01502 if (options & PSL_XA_FORMAT) 01503 { 01504 dyStringPrintf(sqlCmd, "qSeq longblob not null,\n"); 01505 dyStringPrintf(sqlCmd, "tSeq longblob not null,\n"); 01506 } 01507 dyStringPrintf(sqlCmd, indexString, binIx); 01508 sqlCmdStr = cloneString(sqlCmd->string); 01509 dyStringFree(&sqlCmd); 01510 return sqlCmdStr; 01511 }
Here is the call graph for this function:

| void pslGrow | ( | struct psl * | psl, | |
| int * | blockSpacePtr | |||
| ) |
Definition at line 1956 of file psl.c.
References psl::blockSizes, ExpandArray, psl::qSequence, psl::qStarts, psl::tSequence, and psl::tStarts.
Referenced by addBlock(), and addPslBlock().
01960 { 01961 int blockSpace = *blockSpacePtr; 01962 int newSpace = 2 * blockSpace; 01963 ExpandArray(psl->blockSizes, blockSpace, newSpace); 01964 ExpandArray(psl->qStarts, blockSpace, newSpace); 01965 ExpandArray(psl->tStarts, blockSpace, newSpace); 01966 if (psl->qSequence != NULL) 01967 { 01968 ExpandArray(psl->qSequence, blockSpace, newSpace); 01969 ExpandArray(psl->tSequence, blockSpace, newSpace); 01970 } 01971 *blockSpacePtr = newSpace; 01972 }
Here is the caller graph for this function:

Definition at line 1043 of file psl.c.
References psl::blockCount, blockSize, psl::blockSizes, dnaSeq::dna, FALSE, intronOrientation(), psl::qStarts, reverseIntRange(), psl::strand, TRUE, psl::tSize, and psl::tStarts.
01046 { 01047 int blockCount = psl->blockCount, i; 01048 unsigned *tStarts = psl->tStarts; 01049 unsigned *blockSizes = psl->blockSizes; 01050 unsigned *qStarts = psl->qStarts; 01051 int blockSize, start, end; 01052 DNA *dna = seq->dna; 01053 01054 for (i=1; i<blockCount; ++i) 01055 { 01056 blockSize = blockSizes[i-1]; 01057 start = qStarts[i-1]+blockSize; 01058 end = qStarts[i]; 01059 if (start == end) 01060 { 01061 start = tStarts[i-1] + blockSize; 01062 end = tStarts[i]; 01063 if (psl->strand[1] == '-') 01064 reverseIntRange(&start, &end, psl->tSize); 01065 start -= seqOffset; 01066 end -= seqOffset; 01067 if (intronOrientation(dna+start, dna+end) != 0) 01068 return TRUE; 01069 } 01070 } 01071 return FALSE; 01072 }
Here is the call graph for this function:

Definition at line 1029 of file psl.c.
References pslWeightedIntronOrientation().
01034 { 01035 int intronDir = pslWeightedIntronOrientation(psl, genoSeq, offset); 01036 if (intronDir < 0) 01037 intronDir = -1; 01038 else if (intronDir > 0) 01039 intronDir = 1; 01040 return intronDir; 01041 }
Here is the call graph for this function:

| boolean pslIsProtein | ( | const struct psl * | psl | ) |
Definition at line 790 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::strand, psl::tEnd, psl::tSize, psl::tStart, and psl::tStarts.
Referenced by pslCalcMilliBad(), pslCheck(), pslScore(), and pslTransMap().
00792 { 00793 int lastBlock = psl->blockCount - 1; 00794 00795 return (((psl->strand[1] == '+' ) && 00796 (psl->tEnd == psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])) || 00797 ((psl->strand[1] == '-') && 00798 (psl->tStart == (psl->tSize-(psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock]))))); 00799 }
Here is the caller graph for this function:

| static void pslLabelColumns | ( | FILE * | f | ) | [static] |
Definition at line 492 of file psl.c.
Referenced by pslWriteHead(), and pslxWriteHead().
00494 { 00495 fputs("\n" 00496 "match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ \tQ \tQ \tQ \tT \tT \tT \tT \tblock\tblockSizes \tqStarts\t tStarts\n" 00497 " \tmatch\tmatch\t \tcount\tbases\tcount\tbases\t \tname \tsize\tstart\tend\tname \tsize\tstart\tend\tcount\n" 00498 "---------------------------------------------------------------------------------------------------------------------------------------------------------------\n", 00499 f); 00500 }
Here is the caller graph for this function:

| struct psl* pslLoad | ( | char ** | row | ) | [read] |
Definition at line 67 of file psl.c.
References AllocVar, psl::blockCount, psl::blockSizes, cloneString(), psl::match, psl::misMatch, psl::nCount, psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qSize, psl::qStart, psl::qStarts, psl::repMatch, sqlSigned(), sqlUnsigned(), sqlUnsignedDynamicArray(), psl::strand, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, psl::tSize, psl::tStart, and psl::tStarts.
Referenced by pslNext(), pslxLoad(), and readPslToBinKeeper().
00070 { 00071 struct psl *ret; 00072 int sizeOne; 00073 00074 AllocVar(ret); 00075 ret->blockCount = sqlUnsigned(row[17]); 00076 ret->match = sqlUnsigned(row[0]); 00077 ret->misMatch = sqlUnsigned(row[1]); 00078 ret->repMatch = sqlUnsigned(row[2]); 00079 ret->nCount = sqlUnsigned(row[3]); 00080 ret->qNumInsert = sqlUnsigned(row[4]); 00081 ret->qBaseInsert = sqlSigned(row[5]); 00082 ret->tNumInsert = sqlUnsigned(row[6]); 00083 ret->tBaseInsert = sqlSigned(row[7]); 00084 strcpy(ret->strand, row[8]); 00085 ret->qName = cloneString(row[9]); 00086 ret->qSize = sqlUnsigned(row[10]); 00087 ret->qStart = sqlUnsigned(row[11]); 00088 ret->qEnd = sqlUnsigned(row[12]); 00089 ret->tName = cloneString(row[13]); 00090 ret->tSize = sqlUnsigned(row[14]); 00091 ret->tStart = sqlUnsigned(row[15]); 00092 ret->tEnd = sqlUnsigned(row[16]); 00093 sqlUnsignedDynamicArray(row[18], &ret->blockSizes, &sizeOne); 00094 if (sizeOne != ret->blockCount) 00095 { 00096 printf("sizeOne bloxksizes %d bs %d block=%s\n",sizeOne, ret->blockCount,row[18]); 00097 } 00098 assert(sizeOne == ret->blockCount); 00099 sqlUnsignedDynamicArray(row[19], &ret->qStarts, &sizeOne); 00100 if (sizeOne != ret->blockCount) 00101 { 00102 printf("sizeOne qStarts %d bs %d\n",sizeOne, ret->blockCount); 00103 } 00104 assert(sizeOne == ret->blockCount); 00105 sqlUnsignedDynamicArray(row[20], &ret->tStarts, &sizeOne); 00106 if (sizeOne != ret->blockCount) 00107 { 00108 printf("sizeOne tStarts %d bs %d\n",sizeOne, ret->blockCount); 00109 } 00110 assert(sizeOne == ret->blockCount); 00111 return ret; 00112 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct psl* pslLoadAll | ( | char * | fileName | ) | [read] |
Definition at line 411 of file psl.c.
References lineFileClose(), pslFileOpen(), pslNext(), slAddHead, and slReverse().
00413 { 00414 struct lineFile *lf = pslFileOpen(fileName); 00415 struct psl *pslList = NULL, *psl; 00416 while ((psl = pslNext(lf)) != NULL) 00417 { 00418 slAddHead(&pslList, psl); 00419 } 00420 slReverse(&pslList); 00421 lineFileClose(&lf); 00422 return pslList; 00423 }
Here is the call graph for this function:

Definition at line 757 of file psl.c.
References lm, lmAlloc(), lmCloneString(), sqlSigned(), sqlUnsigned(), and sqlUnsignedArray().
Referenced by loadPsl(), and pslxLoadLm().
00759 { 00760 struct psl *ret; 00761 00762 ret = lmAlloc(lm, sizeof(*ret)); 00763 ret->blockCount = sqlUnsigned(row[17]); 00764 ret->match = sqlUnsigned(row[0]); 00765 ret->misMatch = sqlUnsigned(row[1]); 00766 ret->repMatch = sqlUnsigned(row[2]); 00767 ret->nCount = sqlUnsigned(row[3]); 00768 ret->qNumInsert = sqlUnsigned(row[4]); 00769 ret->qBaseInsert = sqlSigned(row[5]); 00770 ret->tNumInsert = sqlUnsigned(row[6]); 00771 ret->tBaseInsert = sqlSigned(row[7]); 00772 strcpy(ret->strand, row[8]); 00773 ret->qName = lmCloneString(lm,row[9]); 00774 ret->qSize = sqlUnsigned(row[10]); 00775 ret->qStart = sqlUnsigned(row[11]); 00776 ret->qEnd = sqlUnsigned(row[12]); 00777 ret->tName = lmCloneString(lm, row[13]); 00778 ret->tSize = sqlUnsigned(row[14]); 00779 ret->tStart = sqlUnsigned(row[15]); 00780 ret->tEnd = sqlUnsigned(row[16]); 00781 ret->blockSizes = lmAlloc(lm, sizeof(ret->blockSizes[0]) * ret->blockCount); 00782 sqlUnsignedArray(row[18], ret->blockSizes, ret->blockCount); 00783 ret->qStarts = lmAlloc(lm, sizeof(ret->qStarts[0]) * ret->blockCount); 00784 sqlUnsignedArray(row[19], ret->qStarts, ret->blockCount); 00785 ret->tStarts = lmAlloc(lm, sizeof(ret->tStarts[0]) * ret->blockCount); 00786 sqlUnsignedArray(row[20], ret->tStarts, ret->blockCount); 00787 return ret; 00788 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct psl* pslNew | ( | char * | qName, | |
| unsigned | qSize, | |||
| int | qStart, | |||
| int | qEnd, | |||
| char * | tName, | |||
| unsigned | tSize, | |||
| int | tStart, | |||
| int | tEnd, | |||
| char * | strand, | |||
| unsigned | blockSpace, | |||
| unsigned | opts | |||
| ) | [read] |
Definition at line 1926 of file psl.c.
References AllocArray, AllocVar, psl::blockSizes, cloneString(), PSL_XA_FORMAT, psl::qEnd, psl::qName, psl::qSequence, psl::qSize, psl::qStart, psl::qStarts, psl::strand, psl::tEnd, psl::tName, psl::tSequence, psl::tSize, psl::tStart, and psl::tStarts.
Referenced by createMappedPsl(), and pslFromAlign().
01932 { 01933 struct psl *psl; 01934 AllocVar(psl); 01935 assert(blockSpace > 0); 01936 psl->qName = cloneString(qName); 01937 psl->qSize = qSize; 01938 psl->qStart = qStart; 01939 psl->qEnd = qEnd; 01940 psl->tName = cloneString(tName); 01941 psl->tSize = tSize; 01942 psl->tStart = tStart; 01943 psl->tEnd = tEnd; 01944 strncpy(psl->strand, strand, 2); 01945 AllocArray(psl->blockSizes, blockSpace); 01946 AllocArray(psl->qStarts, blockSpace); 01947 AllocArray(psl->tStarts, blockSpace); 01948 if (opts & PSL_XA_FORMAT) 01949 { 01950 AllocArray(psl->qSequence, blockSpace); 01951 AllocArray(psl->tSequence, blockSpace); 01952 } 01953 return psl; 01954 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 708 of file psl.c.
References chopLine, errAbort(), lineFile::fileName, lineFileNextReal(), lineFile::lineIx, needMoreMem(), pslLoad(), and pslxLoad().
Referenced by aliLines(), doDetailLine(), and pslLoadAll().
00711 { 00712 char *line; 00713 int lineSize; 00714 char *words[32]; 00715 int wordCount; 00716 static int lineAlloc = 0; 00717 static char *chopBuf = NULL; 00718 00719 if (!lineFileNextReal(lf, &line)) 00720 { 00721 return NULL; 00722 } 00723 lineSize = strlen(line); 00724 if (lineSize >= lineAlloc) 00725 { 00726 lineAlloc = lineSize+256; 00727 chopBuf = needMoreMem(chopBuf, 0, lineAlloc); 00728 } 00729 memcpy(chopBuf, line, lineSize+1); 00730 wordCount = chopLine(chopBuf, words); 00731 if (wordCount == 21) 00732 { 00733 return pslLoad(words); 00734 } 00735 if (wordCount == 23) 00736 { 00737 return pslxLoad(words); 00738 } 00739 else 00740 { 00741 errAbort("Bad line %d of %s wordCount is %d instead of 21 or 23\n", lf->lineIx, lf->fileName, wordCount); 00742 return NULL; 00743 } 00744 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int pslOrientation | ( | struct psl * | psl | ) |
Definition at line 979 of file psl.c.
References psl::strand.
Referenced by pslTailSizes().
00981 { 00982 if (psl->strand[1] != '\0') 00983 { 00984 /* translated blat */ 00985 if (psl->strand[0] != psl->strand[1]) 00986 return -1; 00987 else 00988 return 1; 00989 } 00990 else 00991 { 00992 if (psl->strand[0] == '-') 00993 return -1; 00994 else 00995 return 1; 00996 } 00997 }
Here is the caller graph for this function:

| void pslOutFormat | ( | struct psl * | el, | |
| FILE * | f, | |||
| char | sep, | |||
| char | lastSep | |||
| ) |
Definition at line 355 of file psl.c.
References errAbort(), psl::match, psl::misMatch, psl::nCount, psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::repMatch, startsWith(), psl::strand, psl::tEnd, psl::tName, and psl::tStart.
00360 { 00361 const char *headers[] = {"Matches", "Mismatches", "Matches in repeats", "Number of N bases", "Query name", "Size", "Start", "End", "Chromosome", "Strand", "Start", "End"}; 00362 char *hformat = "<B>%s:</B> "; /* string for formatted print for headers */ 00363 char *uformat = "<B>%s:</B> %u%c"; /* string for formatted print for unsigned variable */ 00364 char *targName; 00365 00366 fprintf(f, uformat, headers[0], el->match, sep); 00367 fprintf(f, uformat, headers[1], el->misMatch, sep); 00368 fprintf(f, uformat, headers[2], el->repMatch, sep); 00369 fprintf(f, uformat, headers[3], el->nCount, sep); 00370 00371 fprintf(f, hformat, headers[4]); 00372 if (sep == ',') fputc('"',f); 00373 fprintf(f, "%s", el->qName); 00374 if (sep == ',') fputc('"',f); 00375 fputc(sep,f); 00376 00377 fprintf(f, uformat, headers[5], el->qSize, sep); 00378 fprintf(f, uformat, headers[6], el->qStart, sep); 00379 fprintf(f, uformat, headers[7], el->qEnd, sep); 00380 00381 fprintf(f, hformat, headers[8]); 00382 if (sep == ',') fputc('"',f); 00383 /* skip leading 'chr' in string to get only chromosome part */ 00384 targName = el->tName; 00385 if (startsWith("chr", el->tName)) 00386 targName += 3; 00387 fprintf(f, "%s", targName); 00388 00389 if (sep == ',') fputc('"',f); 00390 fputc(sep,f); 00391 00392 fprintf(f, hformat, headers[9]); 00393 if (sep == ',') fputc('"',f); 00394 fprintf(f, "%s", el->strand); 00395 if (sep == ',') fputc('"',f); 00396 fputc(sep,f); 00397 00398 fprintf(f, uformat, headers[10], el->tStart, sep); 00399 fprintf(f, uformat, headers[11], el->tEnd, sep); 00400 00401 fputc(lastSep,f); 00402 00403 if (ferror(f)) 00404 { 00405 perror("Error writing psl file\n"); 00406 errAbort("\n"); 00407 } 00408 00409 }
Here is the call graph for this function:

| void pslOutput | ( | struct psl * | el, | |
| FILE * | f, | |||
| char | sep, | |||
| char | lastSep | |||
| ) |
Definition at line 208 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::match, psl::misMatch, psl::nCount, psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qSequence, psl::qSize, psl::qStart, psl::qStarts, psl::repMatch, psl::strand, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, psl::tSequence, psl::tSize, psl::tStart, and psl::tStarts.
00210 { 00211 int i; 00212 fprintf(f, "%u", el->match); 00213 fputc(sep,f); 00214 fprintf(f, "%u", el->misMatch); 00215 fputc(sep,f); 00216 fprintf(f, "%u", el->repMatch); 00217 fputc(sep,f); 00218 fprintf(f, "%u", el->nCount); 00219 fputc(sep,f); 00220 fprintf(f, "%u", el->qNumInsert); 00221 fputc(sep,f); 00222 fprintf(f, "%d", el->qBaseInsert); 00223 fputc(sep,f); 00224 fprintf(f, "%u", el->tNumInsert); 00225 fputc(sep,f); 00226 fprintf(f, "%d", el->tBaseInsert); 00227 fputc(sep,f); 00228 if (sep == ',') fputc('"',f); 00229 fprintf(f, "%s", el->strand); 00230 if (sep == ',') fputc('"',f); 00231 fputc(sep,f); 00232 if (sep == ',') fputc('"',f); 00233 fprintf(f, "%s", el->qName); 00234 if (sep == ',') fputc('"',f); 00235 fputc(sep,f); 00236 fprintf(f, "%u", el->qSize); 00237 fputc(sep,f); 00238 fprintf(f, "%u", el->qStart); 00239 fputc(sep,f); 00240 fprintf(f, "%u", el->qEnd); 00241 fputc(sep,f); 00242 if (sep == ',') fputc('"',f); 00243 fprintf(f, "%s", el->tName); 00244 if (sep == ',') fputc('"',f); 00245 fputc(sep,f); 00246 fprintf(f, "%u", el->tSize); 00247 fputc(sep,f); 00248 fprintf(f, "%u", el->tStart); 00249 fputc(sep,f); 00250 fprintf(f, "%u", el->tEnd); 00251 fputc(sep,f); 00252 fprintf(f, "%u", el->blockCount); 00253 fputc(sep,f); 00254 if (sep == ',') fputc('{',f); 00255 for (i=0; i<el->blockCount; ++i) 00256 { 00257 fprintf(f, "%u", el->blockSizes[i]); 00258 fputc(',', f); 00259 } 00260 if (sep == ',') fputc('}',f); 00261 fputc(sep,f); 00262 if (sep == ',') fputc('{',f); 00263 for (i=0; i<el->blockCount; ++i) 00264 { 00265 fprintf(f, "%u", el->qStarts[i]); 00266 fputc(',', f); 00267 } 00268 if (sep == ',') fputc('}',f); 00269 fputc(sep,f); 00270 if (sep == ',') fputc('{',f); 00271 for (i=0; i<el->blockCount; ++i) 00272 { 00273 fprintf(f, "%u", el->tStarts[i]); 00274 fputc(',', f); 00275 } 00276 if (sep == ',') fputc('}',f); 00277 if (el->qSequence) 00278 { 00279 fputc(sep,f); 00280 if (sep == ',') fputc('{',f); 00281 for (i=0; i<el->blockCount; ++i) 00282 { 00283 fprintf(f, "%s", el->qSequence[i]); 00284 fputc(',', f); 00285 } 00286 if (sep == ',') fputc('}',f); 00287 fputc(sep,f); 00288 if (sep == ',') fputc('{',f); 00289 for (i=0; i<el->blockCount; ++i) 00290 { 00291 fprintf(f, "%s", el->tSequence[i]); 00292 fputc(',', f); 00293 } 00294 if (sep == ',') fputc('}',f); 00295 } 00296 00297 fputc(lastSep,f); 00298 if (ferror(f)) 00299 { 00300 perror("Error writing psl file\n"); 00301 errAbort("\n"); 00302 } 00303 }
| void pslOutputShort | ( | struct psl * | el, | |
| FILE * | f, | |||
| char | sep, | |||
| char | lastSep | |||
| ) |
Definition at line 307 of file psl.c.
References psl::blockCount, errAbort(), psl::match, psl::misMatch, psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qStart, psl::repMatch, psl::strand, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, and psl::tStart.
00309 { 00310 fprintf(f, "%u", el->match); 00311 fputc(sep,f); 00312 fprintf(f, "%u", el->misMatch); 00313 fputc(sep,f); 00314 fprintf(f, "%u", el->repMatch); 00315 fputc(sep,f); 00316 fprintf(f, "%u", el->qNumInsert); 00317 fputc(sep,f); 00318 fprintf(f, "%d", el->qBaseInsert); 00319 fputc(sep,f); 00320 fprintf(f, "%u", el->tNumInsert); 00321 fputc(sep,f); 00322 fprintf(f, "%d", el->tBaseInsert); 00323 fputc(sep,f); 00324 if (sep == ',') fputc('"',f); 00325 fprintf(f, "%s", el->strand); 00326 if (sep == ',') fputc('"',f); 00327 fputc(sep,f); 00328 if (sep == ',') fputc('"',f); 00329 fprintf(f, "%s", el->qName); 00330 if (sep == ',') fputc('"',f); 00331 fputc(sep,f); 00332 fprintf(f, "%u", el->qStart); 00333 fputc(sep,f); 00334 fprintf(f, "%u", abs(el->qEnd - el->qStart)); 00335 fputc(sep,f); 00336 if (sep == ',') fputc('"',f); 00337 fprintf(f, "%s", el->tName); 00338 if (sep == ',') fputc('"',f); 00339 fputc(sep,f); 00340 fprintf(f, "%u", el->tStart); 00341 fputc(sep,f); 00342 fprintf(f, "%u", abs(el->tEnd - el->tStart)); 00343 fputc(sep,f); 00344 fprintf(f, "%u", el->blockCount); 00345 fputc(sep,f); 00346 if (sep == ',') fputc('{',f); 00347 fputc(lastSep,f); 00348 if (ferror(f)) 00349 { 00350 perror("Error writing psl file\n"); 00351 errAbort("\n"); 00352 } 00353 }
Here is the call graph for this function:

Definition at line 1974 of file psl.c.
References psl::blockCount, psl::blockSizes, rangeTreeOverlapSize(), reverseIntRange(), psl::strand, psl::tSize, and psl::tStarts.
01976 { 01977 int i; 01978 int overlap = 0; 01979 boolean isRc = (psl->strand[1] == '-'); 01980 for (i=0; i<psl->blockCount; ++i) 01981 { 01982 int start = psl->tStarts[i]; 01983 int end = start + psl->blockSizes[i]; 01984 if (isRc) 01985 reverseIntRange(&start, &end, psl->tSize); 01986 overlap += rangeTreeOverlapSize(rangeTree, start, end); 01987 } 01988 return overlap; 01989 }
Here is the call graph for this function:

| void pslRc | ( | struct psl * | psl | ) |
Definition at line 1149 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::qSequence, psl::qSize, psl::qStarts, rcSeqs(), reverseUnsigned(), psl::strand, psl::tSequence, psl::tSize, and psl::tStarts.
Referenced by adjustOrientation(), and pslTransMap().
01151 { 01152 int i; 01153 01154 /* swap strand, forcing target to have an explict strand */ 01155 psl->strand[0] = (psl->strand[0] != '-') ? '-' : '+'; 01156 psl->strand[1] = (psl->strand[1] != '-') ? '-' : '+'; 01157 01158 for (i = 0; i < psl->blockCount; i++) 01159 { 01160 psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]); 01161 psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]); 01162 } 01163 reverseUnsigned(psl->tStarts, psl->blockCount); 01164 reverseUnsigned(psl->qStarts, psl->blockCount); 01165 reverseUnsigned(psl->blockSizes, psl->blockCount); 01166 if (psl->qSequence != NULL) 01167 { 01168 rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes); 01169 rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes); 01170 } 01171 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslRcBoth | ( | struct psl * | psl | ) |
Definition at line 1130 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::qSize, psl::qStarts, reverseIntRange(), reverseUnsigned(), psl::tEnd, psl::tSize, psl::tStart, and psl::tStarts.
Referenced by pslGenoShowAlignment(), and pslShowAlignment().
01133 { 01134 int tSize = psl->tSize, qSize = psl->qSize; 01135 int blockCount = psl->blockCount, i; 01136 unsigned *tStarts = psl->tStarts, *qStarts = psl->qStarts, *blockSizes = psl->blockSizes; 01137 01138 reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize); 01139 for (i=0; i<blockCount; ++i) 01140 { 01141 tStarts[i] = (int)tSize - ((int)tStarts[i] + (int)blockSizes[i]); 01142 qStarts[i] = (int)qSize - ((int)qStarts[i] + (int)blockSizes[i]); 01143 } 01144 reverseUnsigned(tStarts, blockCount); 01145 reverseUnsigned(qStarts, blockCount); 01146 reverseUnsigned(blockSizes, blockCount); 01147 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void pslRecalcBounds | ( | struct psl * | psl | ) | [static] |
Definition at line 1281 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::qEnd, psl::qSize, psl::qStart, psl::qStarts, reverseIntRange(), psl::strand, psl::tEnd, psl::tSize, psl::tStart, and psl::tStarts.
Referenced by pslTrimToQueryRange(), and pslTrimToTargetRange().
01284 { 01285 int qStart, qEnd, tStart, tEnd, size; 01286 int last = psl->blockCount - 1; 01287 qStart = psl->qStarts[0]; 01288 tStart = psl->tStarts[0]; 01289 size = psl->blockSizes[last]; 01290 qEnd = psl->qStarts[last] + size; 01291 tEnd = psl->tStarts[last] + size; 01292 if (psl->strand[0] == '-') 01293 reverseIntRange(&qStart, &qEnd, psl->qSize); 01294 if (psl->strand[1] == '-') 01295 reverseIntRange(&tStart, &tEnd, psl->tSize); 01296 psl->qStart = qStart; 01297 psl->qEnd = qEnd; 01298 psl->tStart = tStart; 01299 psl->tEnd = tEnd; 01300 }
Here is the call graph for this function:

Here is the caller graph for this function:

| int pslScore | ( | const struct psl * | psl | ) |
Definition at line 834 of file psl.c.
References psl::match, psl::misMatch, pslIsProtein(), psl::qNumInsert, psl::repMatch, and psl::tNumInsert.
Referenced by aliLines(), pslCmpQueryScore(), pslCmpScore(), pslCmpScoreDesc(), and pslCmpTargetScore().
00836 { 00837 int sizeMul = pslIsProtein(psl) ? 3 : 1; 00838 00839 return sizeMul * (psl->match + ( psl->repMatch>>1)) - 00840 sizeMul * psl->misMatch - psl->qNumInsert - psl->tNumInsert; 00841 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslSwap | ( | struct psl * | psl, | |
| boolean | noRc | |||
| ) |
Definition at line 1218 of file psl.c.
References psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qSize, psl::qStart, psl::strand, swapBlocks(), swapRCBlocks(), swapVars, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, psl::tSize, and psl::tStart.
01221 { 01222 int itmp; 01223 unsigned utmp; 01224 char ctmp, *stmp; 01225 swapVars(psl->qBaseInsert, psl->tBaseInsert, utmp); 01226 swapVars(psl->tNumInsert, psl->qNumInsert, utmp); 01227 swapVars(psl->qName, psl->tName, stmp); 01228 swapVars(psl->qSize, psl->tSize, utmp); 01229 swapVars(psl->qStart, psl->tStart, itmp); 01230 swapVars(psl->qEnd, psl->tEnd, itmp); 01231 01232 /* handle strand and block copy */ 01233 if (psl->strand[1] != '\0') 01234 { 01235 /* translated */ 01236 swapVars(psl->strand[0], psl->strand[1], ctmp); 01237 swapBlocks(psl); 01238 } 01239 else if (noRc) 01240 { 01241 /* untranslated with no reverse complement */ 01242 psl->strand[1] = psl->strand[0]; 01243 psl->strand[0] = '+'; 01244 swapBlocks(psl); 01245 } 01246 else 01247 { 01248 /* untranslated */ 01249 if (psl->strand[0] == '+') 01250 swapBlocks(psl); 01251 else 01252 swapRCBlocks(psl); 01253 } 01254 }
Here is the call graph for this function:

| void pslTailSizes | ( | struct psl * | psl, | |
| int * | retStartTail, | |||
| int * | retEndTail | |||
| ) |
Definition at line 1074 of file psl.c.
References min, pslOrientation(), psl::qEnd, psl::qSize, psl::qStart, psl::tEnd, psl::tSize, and psl::tStart.
01076 { 01077 int orientation = pslOrientation(psl); 01078 int qFloppyStart, qFloppyEnd; 01079 int tFloppyStart, tFloppyEnd; 01080 01081 if (orientation > 0) 01082 { 01083 qFloppyStart = psl->qStart; 01084 qFloppyEnd = psl->qSize - psl->qEnd; 01085 } 01086 else 01087 { 01088 qFloppyStart = psl->qSize - psl->qEnd; 01089 qFloppyEnd = psl->qStart; 01090 } 01091 tFloppyStart = psl->tStart; 01092 tFloppyEnd = psl->tSize - psl->tEnd; 01093 *retStartTail = min(qFloppyStart, tFloppyStart); 01094 *retEndTail = min(qFloppyEnd, tFloppyEnd); 01095 }
Here is the call graph for this function:

| void pslTargetOffset | ( | struct psl * | psl, | |
| int | offset | |||
| ) |
Definition at line 1256 of file psl.c.
References psl::blockCount, psl::tEnd, psl::tStart, and psl::tStarts.
01258 { 01259 int i, blockCount = psl->blockCount; 01260 unsigned *tStarts = psl->tStarts; 01261 psl->tStart += offset; 01262 psl->tEnd += offset; 01263 for (i=0; i<blockCount; ++i) 01264 tStarts[i] += offset; 01265 }
Definition at line 852 of file psl.c.
References AllocVar, psl::blockCount, psl::blockSizes, ffMakeRightLinks(), ffAli::nStart, psl::qStarts, and psl::tStarts.
00856 { 00857 struct ffAli *ffList = NULL, *ff; 00858 int blockCount = psl->blockCount; 00859 unsigned *blockSizes = psl->blockSizes; 00860 unsigned *qStarts = psl->qStarts; 00861 unsigned *tStarts = psl->tStarts; 00862 int size; 00863 int i; 00864 00865 for (i=0; i<blockCount; ++i) 00866 { 00867 size = blockSizes[i]; 00868 AllocVar(ff); 00869 ff->left = ffList; 00870 ffList = ff; 00871 ff->nStart = ff->nEnd = needle + qStarts[i]; 00872 ff->nEnd += size; 00873 ff->hStart = ff->hEnd = haystack + tStarts[i]; 00874 ff->hEnd += size; 00875 } 00876 ffList = ffMakeRightLinks(ffList); 00877 return ffList; 00878 }
Here is the call graph for this function:

| struct ffAli* pslToFfAli | ( | struct psl * | psl, | |
| struct dnaSeq * | query, | |||
| struct dnaSeq * | target, | |||
| int | targetOffset | |||
| ) | [read] |
Definition at line 935 of file psl.c.
References AllocVar, psl::blockCount, psl::blockSizes, dnaSeq::dna, ffCountGoodEnds(), ffMakeRightLinks(), ffAli::nStart, psl::qStarts, dnaSeq::size, and psl::tStarts.
00939 { 00940 struct ffAli *ffList = NULL, *ff; 00941 DNA *needle = query->dna; 00942 DNA *haystack = target->dna; 00943 int blockCount = psl->blockCount; 00944 unsigned *blockSizes = psl->blockSizes; 00945 unsigned *qStarts = psl->qStarts; 00946 unsigned *tStarts = psl->tStarts; 00947 int size; 00948 int i; 00949 int tMin = targetOffset; 00950 int tMax = targetOffset + target->size; 00951 int tStart, tEnd; 00952 int clipStart, clipEnd, clipOffset, clipSize; 00953 00954 for (i=0; i<blockCount; ++i) 00955 { 00956 clipStart = tStart = tStarts[i]; 00957 size = blockSizes[i]; 00958 clipEnd = tEnd = tStart + size; 00959 if (tStart < tMax && tEnd > tMin) 00960 { 00961 if (clipStart < tMin) clipStart = tMin; 00962 if (clipEnd > tMax) clipEnd = tMax; 00963 clipOffset = clipStart - tStart; 00964 clipSize = clipEnd - clipStart; 00965 AllocVar(ff); 00966 ff->left = ffList; 00967 ffList = ff; 00968 ff->nStart = ff->nEnd = needle + qStarts[i] + clipOffset; 00969 ff->nEnd += clipSize; 00970 ff->hStart = ff->hEnd = haystack + clipStart - targetOffset; 00971 ff->hEnd += clipSize; 00972 } 00973 } 00974 ffList = ffMakeRightLinks(ffList); 00975 ffCountGoodEnds(ffList); 00976 return ffList; 00977 }
Here is the call graph for this function:

Definition at line 1388 of file psl.c.
References AllocArray, AllocVar, psl::blockCount, psl::blockSizes, cloneString(), pslRecalcBounds(), psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::qStarts, rangeIntersection(), reverseIntRange(), psl::strand, psl::tName, psl::tSize, and psl::tStarts.
01391 { 01392 int newSize; 01393 int oldBlockCount = oldPsl->blockCount; 01394 boolean qIsRc = (oldPsl->strand[0] == '-'); 01395 int newBlockCount = 0, completeBlockCount = 0; 01396 int i; 01397 struct psl *newPsl = NULL; 01398 int qMn = qMin, qMx = qMax; /* qMin/qMax adjusted for strand. */ 01399 01400 /* Deal with case where we're completely trimmed out quickly. */ 01401 newSize = rangeIntersection(oldPsl->qStart, oldPsl->qEnd, qMin, qMax); 01402 if (newSize <= 0) 01403 return NULL; 01404 01405 if (qIsRc) 01406 reverseIntRange(&qMn, &qMx, oldPsl->qSize); 01407 01408 /* Count how many blocks will survive trimming. */ 01409 oldBlockCount = oldPsl->blockCount; 01410 for (i=0; i<oldBlockCount; ++i) 01411 { 01412 int s = oldPsl->qStarts[i]; 01413 int e = s + oldPsl->blockSizes[i]; 01414 int sz = e - s; 01415 int overlap; 01416 if ((overlap = rangeIntersection(s, e, qMn, qMx)) > 0) 01417 ++newBlockCount; 01418 if (overlap == sz) 01419 ++completeBlockCount; 01420 } 01421 01422 if (newBlockCount == 0) 01423 return NULL; 01424 01425 /* Allocate new psl and fill in what we already know. */ 01426 AllocVar(newPsl); 01427 strcpy(newPsl->strand, oldPsl->strand); 01428 newPsl->qName = cloneString(oldPsl->qName); 01429 newPsl->qSize = oldPsl->qSize; 01430 newPsl->tName = cloneString(oldPsl->tName); 01431 newPsl->tSize = oldPsl->tSize; 01432 newPsl->blockCount = newBlockCount; 01433 AllocArray(newPsl->blockSizes, newBlockCount); 01434 AllocArray(newPsl->qStarts, newBlockCount); 01435 AllocArray(newPsl->tStarts, newBlockCount); 01436 01437 /* Fill in blockSizes, qStarts, tStarts with real data. */ 01438 newBlockCount = completeBlockCount = 0; 01439 for (i=0; i<oldBlockCount; ++i) 01440 { 01441 int oldSz = oldPsl->blockSizes[i]; 01442 int sz = oldSz; 01443 int qS = oldPsl->qStarts[i]; 01444 int qE = qS + sz; 01445 int tS = oldPsl->tStarts[i]; 01446 int tE = tS + sz; 01447 if (rangeIntersection(qS, qE, qMn, qMx) > 0) 01448 { 01449 int diff; 01450 if ((diff = (qMn - qS)) > 0) 01451 { 01452 tS += diff; 01453 qS += diff; 01454 sz -= diff; 01455 } 01456 if ((diff = (qE - qMx)) > 0) 01457 { 01458 tE -= diff; 01459 qE -= diff; 01460 sz -= diff; 01461 } 01462 newPsl->qStarts[newBlockCount] = qS; 01463 newPsl->tStarts[newBlockCount] = tS; 01464 newPsl->blockSizes[newBlockCount] = sz; 01465 ++newBlockCount; 01466 if (sz == oldSz) 01467 ++completeBlockCount; 01468 } 01469 } 01470 pslRecalcBounds(newPsl); 01471 return newPsl; 01472 }
Here is the call graph for this function:

Definition at line 1302 of file psl.c.
References AllocArray, AllocVar, psl::blockCount, psl::blockSizes, cloneString(), pslRecalcBounds(), psl::qName, psl::qSize, psl::qStarts, rangeIntersection(), reverseIntRange(), psl::strand, psl::tEnd, psl::tName, psl::tSize, psl::tStart, and psl::tStarts.
01305 { 01306 int newSize; 01307 int oldBlockCount = oldPsl->blockCount; 01308 boolean tIsRc = (oldPsl->strand[1] == '-'); 01309 int newBlockCount = 0, completeBlockCount = 0; 01310 int i; 01311 struct psl *newPsl = NULL; 01312 int tMn = tMin, tMx = tMax; /* tMin/tMax adjusted for strand. */ 01313 01314 /* Deal with case where we're completely trimmed out quickly. */ 01315 newSize = rangeIntersection(oldPsl->tStart, oldPsl->tEnd, tMin, tMax); 01316 if (newSize <= 0) 01317 return NULL; 01318 01319 if (tIsRc) 01320 reverseIntRange(&tMn, &tMx, oldPsl->tSize); 01321 01322 /* Count how many blocks will survive trimming. */ 01323 oldBlockCount = oldPsl->blockCount; 01324 for (i=0; i<oldBlockCount; ++i) 01325 { 01326 int s = oldPsl->tStarts[i]; 01327 int e = s + oldPsl->blockSizes[i]; 01328 int sz = e - s; 01329 int overlap; 01330 if ((overlap = rangeIntersection(s, e, tMn, tMx)) > 0) 01331 ++newBlockCount; 01332 if (overlap == sz) 01333 ++completeBlockCount; 01334 } 01335 01336 if (newBlockCount == 0) 01337 return NULL; 01338 01339 /* Allocate new psl and fill in what we already know. */ 01340 AllocVar(newPsl); 01341 strcpy(newPsl->strand, oldPsl->strand); 01342 newPsl->qName = cloneString(oldPsl->qName); 01343 newPsl->qSize = oldPsl->qSize; 01344 newPsl->tName = cloneString(oldPsl->tName); 01345 newPsl->tSize = oldPsl->tSize; 01346 newPsl->blockCount = newBlockCount; 01347 AllocArray(newPsl->blockSizes, newBlockCount); 01348 AllocArray(newPsl->qStarts, newBlockCount); 01349 AllocArray(newPsl->tStarts, newBlockCount); 01350 01351 /* Fill in blockSizes, qStarts, tStarts with real data. */ 01352 newBlockCount = completeBlockCount = 0; 01353 for (i=0; i<oldBlockCount; ++i) 01354 { 01355 int oldSz = oldPsl->blockSizes[i]; 01356 int sz = oldSz; 01357 int tS = oldPsl->tStarts[i]; 01358 int tE = tS + sz; 01359 int qS = oldPsl->qStarts[i]; 01360 int qE = qS + sz; 01361 if (rangeIntersection(tS, tE, tMn, tMx) > 0) 01362 { 01363 int diff; 01364 if ((diff = (tMn - tS)) > 0) 01365 { 01366 tS += diff; 01367 qS += diff; 01368 sz -= diff; 01369 } 01370 if ((diff = (tE - tMx)) > 0) 01371 { 01372 tE -= diff; 01373 qE -= diff; 01374 sz -= diff; 01375 } 01376 newPsl->qStarts[newBlockCount] = qS; 01377 newPsl->tStarts[newBlockCount] = tS; 01378 newPsl->blockSizes[newBlockCount] = sz; 01379 ++newBlockCount; 01380 if (sz == oldSz) 01381 ++completeBlockCount; 01382 } 01383 } 01384 pslRecalcBounds(newPsl); 01385 return newPsl; 01386 }
Here is the call graph for this function:

Definition at line 999 of file psl.c.
References psl::blockCount, blockSize, psl::blockSizes, dnaSeq::dna, errAbort(), intronOrientation(), psl::qStarts, psl::strand, and psl::tStarts.
Referenced by pslIntronOrientation().
01005 { 01006 int intronDir = 0; 01007 int oneDir; 01008 int i; 01009 DNA *dna = genoSeq->dna; 01010 01011 /* code below doesn't support negative target strand (translated blat) */ 01012 if (psl->strand[1] == '-') 01013 errAbort("pslWeightedIntronOrientation doesn't support a negative target strand"); 01014 01015 for (i=1; i<psl->blockCount; ++i) 01016 { 01017 int iStart, iEnd, blockSize = psl->blockSizes[i-1]; 01018 if (psl->qStarts[i-1] + blockSize == psl->qStarts[i]) 01019 { 01020 iStart = psl->tStarts[i-1] + psl->blockSizes[i-1] - offset; 01021 iEnd = psl->tStarts[i] - offset; 01022 oneDir = intronOrientation(dna+iStart, dna+iEnd); 01023 intronDir += oneDir; 01024 } 01025 } 01026 return intronDir; 01027 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslWriteAll | ( | struct psl * | pslList, | |
| char * | fileName, | |||
| boolean | writeHeader | |||
| ) |
Definition at line 516 of file psl.c.
References mustOpen(), psl::next, pslTabOut, and pslWriteHead().
00518 { 00519 FILE *f; 00520 struct psl *psl; 00521 00522 f = mustOpen(fileName, "w"); 00523 if (writeHeader) 00524 pslWriteHead(f); 00525 for (psl = pslList; psl != NULL; psl = psl->next) 00526 pslTabOut(psl, f); 00527 fclose(f); 00528 }
Here is the call graph for this function:

| void pslWriteHead | ( | FILE * | f | ) |
Definition at line 509 of file psl.c.
References pslLabelColumns().
Referenced by aliLines(), pslHead(), and pslWriteAll().
00511 { 00512 fputs("psLayout version 3\n", f); 00513 pslLabelColumns(f); 00514 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslxFileOpen | ( | char * | fileName, | |
| enum gfType * | retQueryType, | |||
| enum gfType * | retTargetType, | |||
| struct lineFile ** | retLf | |||
| ) |
Definition at line 530 of file psl.c.
References chopLine, cloneString(), errAbort(), freeMem(), gftDna, gftRna, gfTypeFromName(), lineFileNext(), lineFileOpen(), lineFileReuse(), sameString, startsWith(), TRUE, and warn().
Referenced by pslFileOpen().
00533 { 00534 char *line; 00535 int lineSize; 00536 char *words[30]; 00537 char *version; 00538 int wordCount; 00539 int i; 00540 enum gfType qt = gftRna, tt = gftDna; 00541 struct lineFile *lf = lineFileOpen(fileName, TRUE); 00542 00543 if (!lineFileNext(lf, &line, &lineSize)) 00544 warn("%s is empty", fileName); 00545 else 00546 { 00547 if (startsWith("psLayout version", line)) 00548 { 00549 wordCount = chopLine(line, words); 00550 if (wordCount < 3) 00551 errAbort("%s is not a psLayout file", fileName); 00552 version = words[2]; 00553 if (sameString(version, "3")) 00554 { 00555 } 00556 else if (sameString(version, "4")) 00557 { 00558 qt = gfTypeFromName(words[3]); 00559 tt = gfTypeFromName(words[4]); 00560 } 00561 else 00562 { 00563 errAbort("%s is version %s of psLayout, this program can only handle through version 4", 00564 fileName, version); 00565 } 00566 for (i=0; i<4; ++i) 00567 { 00568 if (!lineFileNext(lf, &line, &lineSize)) 00569 errAbort("%s severely truncated", fileName); 00570 } 00571 } 00572 else 00573 { 00574 char *s = cloneString(line); 00575 while (line != NULL && line[0] == '#') 00576 { 00577 freeMem(s); 00578 lineFileNext(lf, &line, &lineSize); 00579 s = cloneString(line); 00580 } 00581 wordCount = chopLine(s, words); 00582 if (wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-')) 00583 errAbort("%s is not a psLayout file", fileName); 00584 else 00585 lineFileReuse(lf); 00586 freeMem(s); 00587 } 00588 } 00589 *retQueryType = qt; 00590 *retTargetType = tt; 00591 *retLf = lf; 00592 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslxFileOpenWithMeta | ( | char * | fileName, | |
| enum gfType * | retQueryType, | |||
| enum gfType * | retTargetType, | |||
| struct lineFile ** | retLf, | |||
| FILE * | f | |||
| ) |
Definition at line 663 of file psl.c.
References FALSE, and pslxFileOpenWithMetaConfig().
Referenced by pslFileOpenWithMeta().
00666 { 00667 pslxFileOpenWithMetaConfig(fileName, FALSE, retQueryType, retTargetType, retLf, f); 00668 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void pslxFileOpenWithMetaConfig | ( | char * | fileName, | |
| bool | isMetaUnique, | |||
| enum gfType * | retQueryType, | |||
| enum gfType * | retTargetType, | |||
| struct lineFile ** | retLf, | |||
| FILE * | f | |||
| ) | [static] |
Definition at line 594 of file psl.c.
References chopLine, cloneString(), errAbort(), FALSE, freeMem(), gftDna, gftRna, gfTypeFromName(), lineFileNext(), lineFileOpen(), lineFileReuse(), lineFileSetMetaDataOutput(), lineFileSetUniqueMetaData(), sameString, startsWith(), TRUE, and warn().
Referenced by pslxFileOpenWithMeta(), and pslxFileOpenWithUniqueMeta().
00597 { 00598 char *line; 00599 int lineSize; 00600 char *words[30]; 00601 char *version; 00602 int wordCount; 00603 int i; 00604 enum gfType qt = gftRna, tt = gftDna; 00605 struct lineFile *lf = lineFileOpen(fileName, TRUE); 00606 00607 lineFileSetMetaDataOutput(lf, f); 00608 if (isMetaUnique) 00609 lineFileSetUniqueMetaData(lf); 00610 if (!lineFileNext(lf, &line, &lineSize)) 00611 warn("%s is empty", fileName); 00612 else 00613 { 00614 if (startsWith("psLayout version", line)) 00615 { 00616 wordCount = chopLine(line, words); 00617 if (wordCount < 3) 00618 errAbort("%s is not a psLayout file", fileName); 00619 version = words[2]; 00620 if (sameString(version, "3")) 00621 { 00622 } 00623 else if (sameString(version, "4")) 00624 { 00625 qt = gfTypeFromName(words[3]); 00626 tt = gfTypeFromName(words[4]); 00627 } 00628 else 00629 { 00630 errAbort("%s is version %s of psLayout, this program can only handle through version 4", 00631 fileName, version); 00632 } 00633 for (i=0; i<4; ++i) 00634 { 00635 if (!lineFileNext(lf, &line, &lineSize)) 00636 errAbort("%s severely truncated", fileName); 00637 } 00638 } 00639 else 00640 { 00641 char *s = cloneString(line); 00642 boolean eof = FALSE; 00643 while ((line[0] == '#') && (!eof)) 00644 { 00645 freeMem(s); 00646 if (!lineFileNext(lf, &line, &lineSize)) 00647 eof = TRUE; 00648 s = cloneString(line); 00649 } 00650 wordCount = chopLine(s, words); 00651 if ((wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-')) && (!eof)) 00652 errAbort("%s is not a psLayout file", fileName); 00653 else 00654 lineFileReuse(lf); 00655 freeMem(s); 00656 } 00657 } 00658 *retQueryType = qt; 00659 *retTargetType = tt; 00660 *retLf = lf; 00661 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void pslxFileOpenWithUniqueMeta | ( | char * | fileName, | |
| enum gfType * | retQueryType, | |||
| enum gfType * | retTargetType, | |||
| struct lineFile ** | retLf, | |||
| FILE * | f | |||
| ) |
Definition at line 670 of file psl.c.
References pslxFileOpenWithMetaConfig(), and TRUE.
Referenced by pslFileOpenWithUniqueMeta().
00673 { 00674 pslxFileOpenWithMetaConfig(fileName, TRUE, retQueryType, retTargetType, retLf, f); 00675 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct psl* pslxLoad | ( | char ** | row | ) | [read] |
Definition at line 56 of file psl.c.
References pslLoad(), psl::qSequence, sqlStringDynamicArray(), and psl::tSequence.
Referenced by pslNext().
00059 { 00060 struct psl *ret = pslLoad(row); 00061 int retSize; 00062 sqlStringDynamicArray(row[21],&ret->qSequence, &retSize); 00063 sqlStringDynamicArray(row[22],&ret->tSequence, &retSize); 00064 return ret; 00065 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 746 of file psl.c.
References psl::blockCount, lm, lmAlloc(), lmCloneString(), pslLoadLm(), psl::qSequence, sqlStringArray(), and psl::tSequence.
00748 { 00749 struct psl *ret = pslLoadLm(row, lm); 00750 ret->qSequence = lmAlloc(lm, sizeof(ret->qSequence[0]) * ret->blockCount); 00751 sqlStringArray(lmCloneString(lm,row[21]),ret->qSequence, ret->blockCount); 00752 ret->tSequence = lmAlloc(lm, sizeof(ret->tSequence[0]) * ret->blockCount); 00753 sqlStringArray(lmCloneString(lm,row[22]),ret->tSequence, ret->blockCount); 00754 return ret; 00755 }
Here is the call graph for this function:

Definition at line 502 of file psl.c.
References gfTypeName(), and pslLabelColumns().
00504 { 00505 fprintf(f, "psLayout version 4 %s %s\n", gfTypeName(qType), gfTypeName(tType)); 00506 pslLabelColumns(f); 00507 }
Here is the call graph for this function:

| static void rcSeqs | ( | char ** | seqs, | |
| unsigned | blockCount, | |||
| unsigned * | blockSizes | |||
| ) | [static] |
Definition at line 1097 of file psl.c.
References freeMem(), needLargeMem(), and reverseComplement().
Referenced by pslRc(), and swapRCBlocks().
01100 { 01101 char *buf, *next; 01102 int i, memSz = 0; 01103 01104 /* get a new memory block for strings */ 01105 for (i = 0; i < blockCount; i++) 01106 memSz += blockSizes[i]+1; 01107 next = buf = needLargeMem(memSz); 01108 01109 /* reverse compliment and copy to new memory block */ 01110 for (i = blockCount-1; i >= 0; i--) 01111 { 01112 int len = strlen(seqs[i]); 01113 reverseComplement(seqs[i], len); 01114 memcpy(next, seqs[i], len+1); 01115 next += len+1; 01116 } 01117 01118 /* swap memory and update pointers */ 01119 freeMem(seqs[0]); 01120 seqs[0] = buf; 01121 next = buf; 01122 01123 for (i = 0; i < blockCount; i++) 01124 { 01125 seqs[i] = next; 01126 next += blockSizes[i]+1; 01127 } 01128 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct hash* readPslToBinKeeper | ( | char * | sizeFileName, | |
| char * | pslFileName | |||
| ) | [read] |
Definition at line 1663 of file psl.c.
References ArraySize, binKeeperAdd(), binKeeperNew(), hashAdd(), hashLookup(), hashMustFindVal(), lineFileClose(), lineFileNeedNum(), lineFileNextRow(), lineFileOpen(), lineFileRow, name, newHash(), pslLoad(), hash::size, psl::tEnd, psl::tName, TRUE, psl::tStart, and warn().
01665 { 01666 struct binKeeper *bk; 01667 struct psl *psl; 01668 struct lineFile *sf = lineFileOpen(sizeFileName, TRUE); 01669 struct lineFile *pf = lineFileOpen(pslFileName , TRUE); 01670 struct hash *hash = newHash(0); 01671 char *chromRow[2]; 01672 char *row[21] ; 01673 01674 while (lineFileRow(sf, chromRow)) 01675 { 01676 char *name = chromRow[0]; 01677 int size = lineFileNeedNum(sf, chromRow, 1); 01678 01679 if (hashLookup(hash, name) != NULL) 01680 warn("Duplicate %s, ignoring all but first\n", name); 01681 else 01682 { 01683 bk = binKeeperNew(0, size); 01684 assert(size > 1); 01685 hashAdd(hash, name, bk); 01686 } 01687 } 01688 while (lineFileNextRow(pf, row, ArraySize(row))) 01689 { 01690 psl = pslLoad(row); 01691 bk = hashMustFindVal(hash, psl->tName); 01692 binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); 01693 } 01694 lineFileClose(&pf); 01695 lineFileClose(&sf); 01696 return hash; 01697 }
Here is the call graph for this function:

| static void swapBlocks | ( | struct psl * | psl | ) | [static] |
Definition at line 1176 of file psl.c.
References psl::blockCount, psl::qSequence, psl::qStarts, swapVars, psl::tSequence, and psl::tStarts.
Referenced by pslSwap().
01178 { 01179 int i; 01180 unsigned utmp; 01181 char *stmp; 01182 for (i = 0; i < psl->blockCount; i++) 01183 { 01184 swapVars(psl->qStarts[i], psl->tStarts[i], utmp); 01185 if (psl->qSequence != NULL) 01186 swapVars(psl->qSequence[i], psl->tSequence[i], stmp); 01187 } 01188 }
Here is the caller graph for this function:

| static void swapRCBlocks | ( | struct psl * | psl | ) | [static] |
Definition at line 1190 of file psl.c.
References psl::blockCount, psl::blockSizes, psl::qSequence, psl::qSize, psl::qStarts, rcSeqs(), reverseUnsigned(), swapVars, psl::tSequence, psl::tSize, and psl::tStarts.
Referenced by pslSwap().
01193 { 01194 int i; 01195 unsigned *uatmp; 01196 char **satmp; 01197 reverseUnsigned(psl->tStarts, psl->blockCount); 01198 reverseUnsigned(psl->qStarts, psl->blockCount); 01199 reverseUnsigned(psl->blockSizes, psl->blockCount); 01200 swapVars(psl->tStarts, psl->qStarts, uatmp); 01201 01202 /* qSize and tSize have already been swapped */ 01203 for (i = 0; i < psl->blockCount; i++) 01204 { 01205 psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]); 01206 psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]); 01207 } 01208 if (psl->qSequence != NULL) 01209 { 01210 /* note: all block sequences are stored in one malloc block, which is 01211 * entry zero */ 01212 rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes); 01213 rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes); 01214 swapVars(psl->qSequence, psl->tSequence, satmp); 01215 } 01216 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void trimAlignment | ( | struct psl * | psl, | |
| char ** | qStringPtr, | |||
| char ** | tStringPtr, | |||
| int * | aliSizePtr | |||
| ) | [static] |
Definition at line 1741 of file psl.c.
References countInitialChars(), countNonInsert(), countTerminalChars(), max, psl::qEnd, psl::qStart, psl::tEnd, and psl::tStart.
Referenced by pslFromAlign().
01744 { 01745 char* qString = *qStringPtr; 01746 char* tString = *tStringPtr; 01747 int aliSize = *aliSizePtr; 01748 int qStartInsert = countInitialChars(qString, '-'); 01749 int tStartInsert = countInitialChars(tString, '-'); 01750 int qEndInsert = countTerminalChars(qString, '-'); 01751 int tEndInsert = countTerminalChars(tString, '-'); 01752 int startInsert = max(qStartInsert, tStartInsert); 01753 int endInsert = max(qEndInsert, tEndInsert); 01754 int qNonCount, tNonCount; 01755 01756 if (startInsert > 0) 01757 { 01758 qNonCount = countNonInsert(qString, startInsert); 01759 tNonCount = countNonInsert(tString, startInsert); 01760 qString += startInsert; 01761 tString += startInsert; 01762 aliSize -= startInsert; 01763 psl->qStart += qNonCount; 01764 psl->tStart += tNonCount; 01765 } 01766 if (endInsert > 0) 01767 { 01768 aliSize -= endInsert; 01769 qNonCount = countNonInsert(qString+aliSize, endInsert); 01770 tNonCount = countNonInsert(tString+aliSize, endInsert); 01771 qString[aliSize] = 0; 01772 tString[aliSize] = 0; 01773 psl->qEnd -= qNonCount; 01774 psl->tEnd -= tNonCount; 01775 } 01776 *qStringPtr = qString; 01777 *tStringPtr = tString; 01778 *aliSizePtr = aliSize; 01779 /* recursive call to handle double gap */ 01780 if (startInsert > 0 || endInsert > 0) 01781 trimAlignment(psl, qStringPtr, tStringPtr, aliSizePtr); 01782 }
Here is the call graph for this function:

Here is the caller graph for this function:

char* createString [static] |
Initial value:
"CREATE TABLE %s (\n" "%s" "matches int unsigned not null, # Number of bases that match that aren't repeats\n" "misMatches int unsigned not null, # Number of bases that don't match\n" "repMatches int unsigned not null, # Number of bases that match but are part of repeats\n" "nCount int unsigned not null, # Number of 'N' bases\n" "qNumInsert int unsigned not null, # Number of inserts in query\n" "qBaseInsert int unsigned not null, # Number of bases inserted in query\n" "tNumInsert int unsigned not null, # Number of inserts in target\n" "tBaseInsert int unsigned not null, # Number of bases inserted in target\n" "strand char(2) not null, # + or - for strand. First character is query, second is target.\n" "qName varchar(255) not null, # Query sequence name\n" "qSize int unsigned not null, # Query sequence size\n" "qStart int unsigned not null, # Alignment start position in query\n" "qEnd int unsigned not null, # Alignment end position in query\n" "tName varchar(255) not null, # Target sequence name\n" "tSize int unsigned not null, # Target sequence size\n" "tStart int unsigned not null, # Alignment start position in target\n" "tEnd int unsigned not null, # Alignment end position in target\n" "blockCount int unsigned not null, # Number of blocks in alignment\n" "blockSizes longblob not null, # Size of each block\n" "qStarts longblob not null, # Start of each block in query.\n" "tStarts longblob not null, # Start of each block in target.\n"
Definition at line 24 of file psl.c.
Referenced by pslGetCreateSql().
char* indexString [static] |
Initial value:
"#Indices\n"
"%s"
"INDEX(qName(12))\n"
")\n"
Definition at line 49 of file psl.c.
Referenced by pslGetCreateSql().
char const rcsid[] = "$Id: psl.c,v 1.76 2007/03/04 20:24:19 kent Exp $" [static] |
1.5.2