lib/psl.c File Reference

#include "common.h"
#include "sqlNum.h"
#include "sqlList.h"
#include "localmem.h"
#include "psl.h"
#include "hash.h"
#include "linefile.h"
#include "dnaseq.h"
#include "dystring.h"
#include "fuzzyFind.h"
#include "aliType.h"
#include "binRange.h"
#include "rangeTree.h"

Include dependency graph for psl.c:

Go to the source code of this file.

Defines

#define swapVars(a, b, tmp)   ((tmp) = (a), (a) = (b), (b) = (tmp))

Functions

pslpslxLoad (char **row)
pslpslLoad (char **row)
pslpslCommaIn (char **pS, struct psl *ret)
void pslFree (struct psl **pEl)
void pslFreeList (struct psl **pList)
void pslOutput (struct psl *el, FILE *f, char sep, char lastSep)
void pslOutputShort (struct psl *el, FILE *f, char sep, char lastSep)
void pslOutFormat (struct psl *el, FILE *f, char sep, char lastSep)
pslpslLoadAll (char *fileName)
int pslCmpQuery (const void *va, const void *vb)
int pslCmpTarget (const void *va, const void *vb)
int pslCmpTargetAndStrand (const void *va, const void *vb)
int pslCmpScore (const void *va, const void *vb)
int pslCmpQueryScore (const void *va, const void *vb)
int pslCmpMatch (const void *va, const void *vb)
static void pslLabelColumns (FILE *f)
void pslxWriteHead (FILE *f, enum gfType qType, enum gfType tType)
void pslWriteHead (FILE *f)
void pslWriteAll (struct psl *pslList, char *fileName, boolean writeHeader)
void pslxFileOpen (char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf)
static void pslxFileOpenWithMetaConfig (char *fileName, bool isMetaUnique, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
void pslxFileOpenWithMeta (char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
void pslxFileOpenWithUniqueMeta (char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
lineFilepslFileOpen (char *fileName)
lineFilepslFileOpenWithMeta (char *fileName, FILE *f)
lineFilepslFileOpenWithUniqueMeta (char *fileName, FILE *f)
pslpslNext (struct lineFile *lf)
pslpslxLoadLm (char **row, struct lm *lm)
pslpslLoadLm (char **row, struct lm *lm)
boolean pslIsProtein (const struct psl *psl)
int pslCalcMilliBad (struct psl *psl, boolean isMrna)
int pslScore (const struct psl *psl)
int pslCmpScoreDesc (const void *va, const void *vb)
ffAlipslToFakeFfAli (struct psl *psl, DNA *needle, DNA *haystack)
pslpslFromFakeFfAli (struct ffAli *ff, DNA *needle, DNA *haystack, char strand, char *qName, int qSize, char *tName, int tSize)
ffAlipslToFfAli (struct psl *psl, struct dnaSeq *query, struct dnaSeq *target, int targetOffset)
int pslOrientation (struct psl *psl)
int pslWeightedIntronOrientation (struct psl *psl, struct dnaSeq *genoSeq, int offset)
int pslIntronOrientation (struct psl *psl, struct dnaSeq *genoSeq, int offset)
boolean pslHasIntron (struct psl *psl, struct dnaSeq *seq, int seqOffset)
void pslTailSizes (struct psl *psl, int *retStartTail, int *retEndTail)
static void rcSeqs (char **seqs, unsigned blockCount, unsigned *blockSizes)
void pslRcBoth (struct psl *psl)
void pslRc (struct psl *psl)
static void swapBlocks (struct psl *psl)
static void swapRCBlocks (struct psl *psl)
void pslSwap (struct psl *psl, boolean noRc)
void pslTargetOffset (struct psl *psl, int offset)
void pslDump (struct psl *psl, FILE *f)
static void pslRecalcBounds (struct psl *psl)
pslpslTrimToTargetRange (struct psl *oldPsl, int tMin, int tMax)
pslpslTrimToQueryRange (struct psl *oldPsl, int qMin, int qMax)
char * pslGetCreateSql (char *table, unsigned options, int tNameIdxLen)
static void printPslDesc (char *pslDesc, FILE *out, struct psl *psl)
static void chkRanges (char *pslDesc, FILE *out, struct psl *psl, char *pName, char *pLabel, char pCLabel, char pStrand, unsigned pSize, unsigned pStart, unsigned pEnd, unsigned blockCount, unsigned *blockSizes, unsigned *pBlockStarts, int *errCountPtr)
int pslCheck (char *pslDesc, FILE *out, struct psl *psl)
hashreadPslToBinKeeper (char *sizeFileName, char *pslFileName)
static int countInitialChars (char *s, char c)
static int countTerminalChars (char *s, char c)
static int countNonInsert (char *s, int size)
static void trimAlignment (struct psl *psl, char **qStringPtr, char **tStringPtr, int *aliSizePtr)
static void addBlock (struct psl *psl, int qs, int qe, int ts, int te, int *blockSpace)
static void accumCounts (struct psl *psl, char prevQ, char prevT, char q, char t, unsigned options)
pslpslFromAlign (char *qName, int qSize, int qStart, int qEnd, char *qString, char *tName, int tSize, int tStart, int tEnd, char *tString, char *strand, unsigned options)
pslpslNew (char *qName, unsigned qSize, int qStart, int qEnd, char *tName, unsigned tSize, int tStart, int tEnd, char *strand, unsigned blockSpace, unsigned opts)
void pslGrow (struct psl *psl, int *blockSpacePtr)
int pslRangeTreeOverlap (struct psl *psl, struct rbTree *rangeTree)

Variables

static char const rcsid [] = "$Id: psl.c,v 1.76 2007/03/04 20:24:19 kent Exp $"
static char * createString
static char * indexString


Define Documentation

#define swapVars ( a,
b,
tmp   )     ((tmp) = (a), (a) = (b), (b) = (tmp))

Definition at line 1174 of file psl.c.

Referenced by pslSwap(), swapBlocks(), and swapRCBlocks().


Function Documentation

static void accumCounts ( struct psl psl,
char  prevQ,
char  prevT,
char  q,
char  t,
unsigned  options 
) [static]

Definition at line 1797 of file psl.c.

References psl::match, psl::misMatch, psl::nCount, PSL_IS_SOFTMASK, psl::qBaseInsert, psl::qNumInsert, psl::repMatch, psl::tBaseInsert, and psl::tNumInsert.

Referenced by pslFromAlign().

01800 {
01801 if ((q != '-') && (t != '-'))
01802     {
01803     /* aligned column. */
01804     char qu = toupper(q);
01805     char tu = toupper(t);
01806     if ((q == 'N') || (t == 'N'))
01807         psl->nCount++;
01808     else if (qu == tu)
01809         {
01810         if ((options & PSL_IS_SOFTMASK) && ((qu != q) || (tu != t)))
01811             psl->repMatch++;
01812         else
01813             psl->match++;
01814         }
01815     else
01816         psl->misMatch++;
01817     }
01818 else if ((q == '-') && (t != '-'))
01819     {
01820     /* target insert */
01821     psl->tBaseInsert++;
01822     if (prevQ != '-')
01823         psl->tNumInsert++;
01824     }
01825 else if ((t == '-') && (q != '-'))
01826     {
01827     /* query insert */
01828     psl->qBaseInsert++;
01829     if (prevT != '-')
01830         psl->qNumInsert++;
01831     }
01832 }

Here is the caller graph for this function:

static void addBlock ( struct psl psl,
int  qs,
int  qe,
int  ts,
int  te,
int *  blockSpace 
) [static]

Definition at line 1784 of file psl.c.

References psl::blockCount, and pslGrow().

Referenced by pslFromAlign().

01787 {
01788 assert((qe-qs) == (te-ts));  /* same lengths? */
01789 if (psl->blockCount == *blockSpace)
01790     pslGrow(psl, blockSpace);
01791 psl->blockSizes[psl->blockCount] = qe - qs;
01792 psl->qStarts[psl->blockCount] = qs;
01793 psl->tStarts[psl->blockCount] = ts;
01794 psl->blockCount++;
01795 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void chkRanges ( char *  pslDesc,
FILE *  out,
struct psl psl,
char *  pName,
char *  pLabel,
char  pCLabel,
char  pStrand,
unsigned  pSize,
unsigned  pStart,
unsigned  pEnd,
unsigned  blockCount,
unsigned *  blockSizes,
unsigned *  pBlockStarts,
int *  errCountPtr 
) [static]

Definition at line 1522 of file psl.c.

References printPslDesc().

Referenced by pslCheck().

01528 {
01529 int errCount = *errCountPtr;
01530 unsigned iBlk, prevBlkEnd = 0;
01531 
01532 if (pStart >= pEnd)
01533     {
01534     if (errCount == 0)
01535         printPslDesc(pslDesc, out, psl);
01536     fprintf(out, "\t%s %cStart %u >= %cEnd %u\n",
01537             pName, pCLabel, pStart, pCLabel, pEnd);
01538     errCount++;
01539     }
01540 if (pEnd > pSize)
01541     {
01542     if (errCount == 0)
01543         printPslDesc(pslDesc, out, psl);
01544     fprintf(out, "\t%s %cEnd %u >= %cSize %u\n",
01545             pName, pCLabel, pEnd, pCLabel, pSize);
01546     errCount++;
01547     }
01548 for (iBlk = 0; iBlk < blockCount; iBlk++)
01549     {
01550     unsigned blkStart = pBlockStarts[iBlk];
01551     unsigned blkEnd = blkStart+blockSizes[iBlk];
01552     /* translate stand to genomic coords */
01553     unsigned gBlkStart = (pStrand == '+') ? blkStart : (pSize - blkEnd);
01554     unsigned gBlkEnd = (pStrand == '+') ? blkEnd : (pSize - blkStart);
01555 
01556     if ((pSize > 0) && (blkEnd > pSize))
01557         {
01558         if (errCount == 0)
01559             printPslDesc(pslDesc, out, psl);
01560         fprintf(out, "\t%s %s block %u end %u > %cSize %u\n",
01561                 pName, pLabel, iBlk, blkEnd, pCLabel, pSize);
01562         errCount++;
01563         }
01564     if (gBlkStart < pStart)
01565         {
01566         if (errCount == 0)
01567             printPslDesc(pslDesc, out, psl);
01568         fprintf(out, "\t%s %s block %u start %u < %cStart %u\n",
01569                 pName, pLabel, iBlk, gBlkStart, pCLabel, pStart);
01570         errCount++;
01571         }
01572     if (gBlkStart >= pEnd)
01573         {
01574         if (errCount == 0)
01575             printPslDesc(pslDesc, out, psl);
01576         fprintf(out, "\t%s %s block %u start %u >= %cEnd %u\n",
01577                 pName, pLabel, iBlk, gBlkStart, pCLabel, pEnd);
01578         errCount++;
01579         }
01580     if (gBlkEnd < pStart)
01581         {
01582         if (errCount == 0)
01583             printPslDesc(pslDesc, out, psl);
01584         fprintf(out, "\t%s %s block %u end %u < %cStart %u\n",
01585                 pName, pLabel, iBlk, gBlkEnd, pCLabel, pStart);
01586         errCount++;
01587         }
01588     if (gBlkEnd > pEnd)
01589         {
01590         if (errCount == 0)
01591             printPslDesc(pslDesc, out, psl);
01592         fprintf(out, "\t%s %s block %u end %u > %cEnd %u\n",
01593                 pName, pLabel, iBlk, gBlkEnd, pCLabel, pEnd);
01594         errCount++;
01595         }
01596     if ((iBlk > 0) && (blkStart < prevBlkEnd))
01597         {
01598         if (errCount == 0)
01599             printPslDesc(pslDesc, out, psl);
01600         fprintf(out, "\t%s %s block %u start %u < previous block end %u\n",
01601                 pName, pLabel, iBlk, blkStart, prevBlkEnd);
01602         errCount++;
01603         }
01604     prevBlkEnd = blkEnd;
01605     }
01606 *errCountPtr = errCount;
01607 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int countInitialChars ( char *  s,
char  c 
) [static]

Definition at line 1699 of file psl.c.

Referenced by trimAlignment().

01701 {
01702 int count = 0;
01703 char d;
01704 while ((d = *s++) != 0)
01705     {
01706     if (c == d)
01707         ++count;
01708     else
01709         break;
01710     }
01711 return count;
01712 }

Here is the caller graph for this function:

static int countNonInsert ( char *  s,
int  size 
) [static]

Definition at line 1729 of file psl.c.

Referenced by trimAlignment().

01732 {
01733 int count = 0;
01734 int i;
01735 for (i=0; i<size; ++i)
01736     if (*s++ != '-')
01737         ++count;
01738 return count;
01739 }

Here is the caller graph for this function:

static int countTerminalChars ( char *  s,
char  c 
) [static]

Definition at line 1714 of file psl.c.

Referenced by trimAlignment().

01716 {
01717 int len = strlen(s), i;
01718 int count = 0;
01719 for (i=len-1; i>=0; --i)
01720     {
01721     if (c == s[i])
01722         ++count;
01723     else
01724         break;
01725     }
01726 return count;
01727 }

Here is the caller graph for this function:

static void printPslDesc ( char *  pslDesc,
FILE *  out,
struct psl psl 
) [static]

Definition at line 1513 of file psl.c.

References psl::qEnd, psl::qName, psl::qStart, psl::strand, psl::tEnd, psl::tName, and psl::tStart.

Referenced by chkRanges(), and pslCheck().

01515 {
01516 fprintf(out, "Error: invalid PSL: %s:%u-%u %s:%u-%u %s %s\n",
01517         psl->qName, psl->qStart, psl->qEnd,
01518         psl->tName, psl->tStart, psl->tEnd,
01519         psl->strand, pslDesc);
01520 }

Here is the caller graph for this function:

int pslCalcMilliBad ( struct psl psl,
boolean  isMrna 
)

Definition at line 801 of file psl.c.

References psl::match, min, psl::misMatch, pslIsProtein(), psl::qEnd, psl::qNumInsert, psl::qStart, psl::repMatch, round, psl::tEnd, psl::tNumInsert, and psl::tStart.

Referenced by aliLines().

00803 {
00804 int sizeMul = pslIsProtein(psl) ? 3 : 1;
00805 int qAliSize, tAliSize, aliSize;
00806 int milliBad = 0;
00807 int sizeDif;
00808 int insertFactor;
00809 int total;
00810 
00811 qAliSize = sizeMul * (psl->qEnd - psl->qStart);
00812 tAliSize = psl->tEnd - psl->tStart;
00813 aliSize = min(qAliSize, tAliSize);
00814 if (aliSize <= 0)
00815     return 0;
00816 sizeDif = qAliSize - tAliSize;
00817 if (sizeDif < 0)
00818     {
00819     if (isMrna)
00820         sizeDif = 0;
00821     else
00822         sizeDif = -sizeDif;
00823     }
00824 insertFactor = psl->qNumInsert;
00825 if (!isMrna)
00826     insertFactor += psl->tNumInsert;
00827 
00828 total = (sizeMul * (psl->match + psl->repMatch + psl->misMatch));
00829 if (total != 0)
00830     milliBad = (1000 * (psl->misMatch*sizeMul + insertFactor + round(3*log(1+sizeDif)))) / total;
00831 return milliBad;
00832 }

Here is the call graph for this function:

Here is the caller graph for this function:

int pslCheck ( char *  pslDesc,
FILE *  out,
struct psl psl 
)

Definition at line 1609 of file psl.c.

References psl::blockCount, psl::blockSizes, chkRanges(), FALSE, printPslDesc(), pslIsProtein(), psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::qStarts, psl::strand, psl::tEnd, psl::tName, TRUE, psl::tSize, psl::tStart, and psl::tStarts.

01612 {
01613 static char* VALID_STRANDS[] = {
01614     "+", "-", "++", "+-", "-+", "--", NULL
01615 };
01616 int i, errCount = 0;
01617 char strand;
01618 boolean isProt = FALSE;
01619 
01620 /* check strand value */
01621 for (i = 0; VALID_STRANDS[i] != NULL; i++)
01622     {
01623     if (strcmp(psl->strand, VALID_STRANDS[i]) == 0)
01624         break;
01625     }
01626 if (VALID_STRANDS[i] == NULL)
01627     {
01628     if (errCount == 0)
01629         printPslDesc(pslDesc, out, psl);
01630     fprintf(out, "\tinvalid PSL strand: \"%s\"\n", psl->strand);
01631     errCount++;
01632     }
01633 
01634 /* check target */
01635 if (pslIsProtein(psl))
01636     {
01637     isProt = TRUE;
01638     for (i = 0; i < psl->blockCount ; i++)
01639         psl->blockSizes[i] *= 3;
01640     }
01641 
01642 strand = ((psl->strand[1] == '\0') ? '+' : psl->strand[1]);
01643 chkRanges(pslDesc, out, psl, psl->tName, "target", 't',
01644           strand, psl->tSize, psl->tStart, psl->tEnd,
01645           psl->blockCount, psl->blockSizes, psl->tStarts,
01646           &errCount);
01647 if (isProt)
01648     {
01649     for (i = 0; i < psl->blockCount ; i++)
01650         psl->blockSizes[i] /= 3;
01651     }
01652 
01653 /* check query */
01654 strand = psl->strand[0];
01655 chkRanges(pslDesc, out, psl, psl->qName, "query", 'q',
01656           strand, psl->qSize, psl->qStart, psl->qEnd,
01657           psl->blockCount, psl->blockSizes, psl->qStarts,
01658           &errCount);
01659 
01660 return errCount;
01661 }

Here is the call graph for this function:

int pslCmpMatch ( const void *  va,
const void *  vb 
)

Definition at line 484 of file psl.c.

References psl::match.

00486 {
00487 const struct psl *a = *((struct psl **)va);
00488 const struct psl *b = *((struct psl **)vb);
00489 return b->match - a->match;
00490 }

int pslCmpQuery ( const void *  va,
const void *  vb 
)

Definition at line 426 of file psl.c.

References psl::qName, and psl::qStart.

Referenced by aliLines().

00428 {
00429 const struct psl *a = *((struct psl **)va);
00430 const struct psl *b = *((struct psl **)vb);
00431 int dif;
00432 dif = strcmp(a->qName, b->qName);
00433 if (dif == 0)
00434     dif = a->qStart - b->qStart;
00435 return dif;
00436 }

Here is the caller graph for this function:

int pslCmpQueryScore ( const void *  va,
const void *  vb 
)

Definition at line 473 of file psl.c.

References pslScore(), and psl::qName.

Referenced by aliLines().

00475 {
00476 const struct psl *a = *((struct psl **)va);
00477 const struct psl *b = *((struct psl **)vb);
00478 int diff = strcmp(a->qName, b->qName);
00479 if (diff == 0)
00480     diff = pslScore(b) - pslScore(a);
00481 return diff;
00482 }

Here is the call graph for this function:

Here is the caller graph for this function:

int pslCmpScore ( const void *  va,
const void *  vb 
)

Definition at line 465 of file psl.c.

References pslScore().

Referenced by aliLines().

00467 {
00468 const struct psl *a = *((struct psl **)va);
00469 const struct psl *b = *((struct psl **)vb);
00470 return pslScore(b) - pslScore(a);
00471 }

Here is the call graph for this function:

Here is the caller graph for this function:

int pslCmpScoreDesc ( const void *  va,
const void *  vb 
)

Definition at line 843 of file psl.c.

References pslScore().

00845 {
00846 const struct psl *a = *((struct psl **)va);
00847 const struct psl *b = *((struct psl **)vb);
00848 return pslScore(b) - pslScore(a);
00849 }

Here is the call graph for this function:

int pslCmpTarget ( const void *  va,
const void *  vb 
)

Definition at line 438 of file psl.c.

References psl::tName, and psl::tStart.

00440 {
00441 const struct psl *a = *((struct psl **)va);
00442 const struct psl *b = *((struct psl **)vb);
00443 int dif;
00444 dif = strcmp(a->tName, b->tName);
00445 if (dif == 0)
00446     dif = a->tStart - b->tStart;
00447 return dif;
00448 }

int pslCmpTargetAndStrand ( const void *  va,
const void *  vb 
)

Definition at line 450 of file psl.c.

References psl::strand, psl::tName, and psl::tStart.

00452 {
00453 const struct psl *a = *((struct psl **)va);
00454 const struct psl *b = *((struct psl **)vb);
00455 int dif;
00456 dif = strcmp(a->tName, b->tName);
00457 if (dif == 0)
00458     dif = strcmp(a->strand, b->strand);
00459 if (dif == 0)
00460     dif = a->tStart - b->tStart;
00461 return dif;
00462 }

struct psl* pslCommaIn ( char **  pS,
struct psl ret 
) [read]

Definition at line 114 of file psl.c.

References AllocArray, AllocVar, sqlEatChar(), sqlFixedStringComma(), sqlSignedComma(), sqlStringComma(), and sqlUnsignedComma().

00118 {
00119 char *s = *pS;
00120 int i;
00121 
00122 if (ret == NULL)
00123     AllocVar(ret);
00124 ret->match = sqlUnsignedComma(&s);
00125 ret->misMatch = sqlUnsignedComma(&s);
00126 ret->repMatch = sqlUnsignedComma(&s);
00127 ret->nCount = sqlUnsignedComma(&s);
00128 ret->qNumInsert = sqlUnsignedComma(&s);
00129 ret->qBaseInsert = sqlSignedComma(&s);
00130 ret->tNumInsert = sqlUnsignedComma(&s);
00131 ret->tBaseInsert = sqlSignedComma(&s);
00132 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
00133 ret->qName = sqlStringComma(&s);
00134 ret->qSize = sqlUnsignedComma(&s);
00135 ret->qStart = sqlUnsignedComma(&s);
00136 ret->qEnd = sqlUnsignedComma(&s);
00137 ret->tName = sqlStringComma(&s);
00138 ret->tSize = sqlUnsignedComma(&s);
00139 ret->tStart = sqlUnsignedComma(&s);
00140 ret->tEnd = sqlUnsignedComma(&s);
00141 ret->blockCount = sqlUnsignedComma(&s);
00142 s = sqlEatChar(s, '{');
00143 AllocArray(ret->blockSizes, ret->blockCount);
00144 for (i=0; i<ret->blockCount; ++i)
00145     {
00146     ret->blockSizes[i] = sqlUnsignedComma(&s);
00147     }
00148 s = sqlEatChar(s, '}');
00149 s = sqlEatChar(s, ',');
00150 s = sqlEatChar(s, '{');
00151 AllocArray(ret->qStarts, ret->blockCount);
00152 for (i=0; i<ret->blockCount; ++i)
00153     {
00154     ret->qStarts[i] = sqlUnsignedComma(&s);
00155     }
00156 s = sqlEatChar(s, '}');
00157 s = sqlEatChar(s, ',');
00158 s = sqlEatChar(s, '{');
00159 AllocArray(ret->tStarts, ret->blockCount);
00160 for (i=0; i<ret->blockCount; ++i)
00161     {
00162     ret->tStarts[i] = sqlUnsignedComma(&s);
00163     }
00164 s = sqlEatChar(s, '}');
00165 s = sqlEatChar(s, ',');
00166 *pS = s;
00167 return ret;
00168 }

Here is the call graph for this function:

void pslDump ( struct psl psl,
FILE *  f 
)

Definition at line 1267 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::qEnd, psl::qName, psl::qStart, psl::qStarts, psl::strand, psl::tEnd, psl::tName, psl::tStart, and psl::tStarts.

01269 {
01270 int i;
01271 fprintf(f, "<PRE>\n");
01272 fprintf(f, "psl %s:%d-%d %s %s:%d-%d %d\n", 
01273         psl->qName, psl->qStart, psl->qEnd, psl->strand,
01274         psl->tName, psl->tStart, psl->tEnd, psl->blockCount);
01275 for (i=0; i<psl->blockCount; ++i)
01276     fprintf(f, "  size %d, qStart %d, tStart %d\n", 
01277         psl->blockSizes[i], psl->qStarts[i], psl->tStarts[i]);
01278 fprintf(f, "</PRE>");
01279 }

struct lineFile* pslFileOpen ( char *  fileName  )  [read]

Definition at line 677 of file psl.c.

References pslxFileOpen().

Referenced by aliLines(), doDetailLine(), and pslLoadAll().

00680 {
00681 enum gfType qt, tt;
00682 struct lineFile *lf;
00683 pslxFileOpen(fileName, &qt, &tt, &lf);
00684 return lf;
00685 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct lineFile* pslFileOpenWithMeta ( char *  fileName,
FILE *  f 
) [read]

Definition at line 687 of file psl.c.

References pslxFileOpenWithMeta().

00690 {
00691 enum gfType qt, tt;
00692 struct lineFile *lf;
00693 pslxFileOpenWithMeta(fileName, &qt, &tt, &lf, f);
00694 return lf;
00695 }

Here is the call graph for this function:

struct lineFile* pslFileOpenWithUniqueMeta ( char *  fileName,
FILE *  f 
) [read]

Definition at line 697 of file psl.c.

References pslxFileOpenWithUniqueMeta().

00701 {
00702 enum gfType qt, tt;
00703 struct lineFile *lf;
00704 pslxFileOpenWithUniqueMeta(fileName, &qt, &tt, &lf, f);
00705 return lf;
00706 }

Here is the call graph for this function:

void pslFree ( struct psl **  pEl  ) 

Definition at line 170 of file psl.c.

References psl::blockSizes, freeMem(), freez(), psl::qName, psl::qSequence, psl::qStarts, psl::tName, psl::tSequence, and psl::tStarts.

Referenced by pslFreeList(), pslFromAlign(), and pslTransMap().

00173 {
00174 struct psl *el;
00175 
00176 if ((el = *pEl) == NULL) return;
00177 freeMem(el->qName);
00178 freeMem(el->tName);
00179 freeMem(el->blockSizes);
00180 freeMem(el->qStarts);
00181 freeMem(el->tStarts);
00182 if (el->qSequence)
00183     {
00184     freeMem(el->qSequence[0]);
00185     freeMem(el->qSequence);
00186     }
00187 if (el->tSequence)
00188     {
00189     freeMem(el->tSequence[0]);
00190     freeMem(el->tSequence);
00191     }
00192 freez(pEl);
00193 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslFreeList ( struct psl **  pList  ) 

Definition at line 195 of file psl.c.

References psl::next, and pslFree().

Referenced by aliLines().

00197 {
00198 struct psl *el, *next;
00199 
00200 for (el = *pList; el != NULL; el = next)
00201     {
00202     next = el->next;
00203     pslFree(&el);
00204     }
00205 *pList = NULL;
00206 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct psl* pslFromAlign ( char *  qName,
int  qSize,
int  qStart,
int  qEnd,
char *  qString,
char *  tName,
int  tSize,
int  tStart,
int  tEnd,
char *  tString,
char *  strand,
unsigned  options 
) [read]

Definition at line 1834 of file psl.c.

References accumCounts(), addBlock(), AllocVar, errAbort(), FALSE, pslFree(), pslNew(), psl::qEnd, psl::qSize, psl::qStart, reverseIntRange(), psl::tEnd, trimAlignment(), TRUE, psl::tSize, and psl::tStart.

01840 {
01841 /* Note, the unit tests for these programs exercise this function:
01842  *   hg/embossToPsl
01843  *   hg/mouseStuff/axtToPsl
01844  *   hg/spideyToPsl
01845  *   utils/est2genomeToPsl
01846  */
01847 
01848 int blockSpace = 16;
01849 struct psl* psl = NULL;
01850 int aliSize = strlen(qString);
01851 boolean eitherInsert = FALSE;   /* True if either in insert state. */
01852 int i, qs,qe,ts,te;
01853 char prevQ = '\0',  prevT = '\0';
01854 AllocVar(psl);
01855 
01856 if (strlen(tString) != aliSize)
01857     errAbort("query and target alignment strings are different lengths");
01858 
01859 psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd,
01860              strand, blockSpace, 0);
01861 trimAlignment(psl, &qString, &tString, &aliSize);
01862 
01863 /* Don't create if either query or target is zero length after trim */
01864  if ((psl->qStart == psl->qEnd) || (psl->tStart == psl->tEnd))
01865      {
01866      pslFree(&psl);
01867      return NULL;
01868      }
01869 
01870 /* Get range of alignment in strand-specified coordinates */
01871 qs = psl->qStart;
01872 qe = psl->qEnd;
01873 if (strand[0] == '-')
01874     reverseIntRange(&qs, &qe, psl->qSize);
01875 ts = psl->tStart;
01876 te = psl->tEnd;
01877 if (strand[1] == '-')
01878     reverseIntRange(&ts, &te, psl->tSize);
01879 
01880 eitherInsert = FALSE;
01881 qe = qs;  /* current block coords */
01882 te = ts;
01883 for (i=0; i<aliSize; ++i)
01884     {
01885     char q = qString[i];
01886     char t = tString[i];
01887     if ((q == '-') && (t == '-'))
01888         {
01889         continue; /* nothing in this column, just ignore it */
01890         }
01891     else if ((q == '-') || (t == '-'))
01892         {
01893         /* insert in one of the columns */
01894         if (!eitherInsert)
01895             {
01896             /* end of a block */
01897             addBlock(psl, qs, qe, ts, te, &blockSpace);
01898             eitherInsert = TRUE;
01899             }
01900         if (q != '-')
01901             qe += 1;
01902         if (t != '-')
01903             te += 1;
01904         }
01905     else
01906         {
01907         /* columns aligned */
01908         if (eitherInsert)
01909             {
01910             /* start new block */
01911             qs = qe;
01912             ts = te;
01913             eitherInsert = FALSE;
01914             }
01915         qe += 1;
01916         te += 1;
01917         }
01918     accumCounts(psl, prevQ, prevT, q, t, options);
01919     prevQ = q; /* will not include skipped empty columns */
01920     prevT = t;
01921     }
01922 addBlock(psl, qs, qe, ts, te, &blockSpace);
01923 return psl;
01924 }

Here is the call graph for this function:

struct psl* pslFromFakeFfAli ( struct ffAli ff,
DNA needle,
DNA haystack,
char  strand,
char *  qName,
int  qSize,
char *  tName,
int  tSize 
) [read]

Definition at line 880 of file psl.c.

References AllocArray, AllocVar, psl::blockCount, psl::blockSizes, cloneString(), ffAliCount(), ffAli::hEnd, ffAli::hStart, ffAli::nEnd, ffAli::nStart, psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::qStarts, reverseIntRange(), ffAli::right, psl::strand, psl::tEnd, psl::tName, psl::tSize, psl::tStart, and psl::tStarts.

00886 {
00887 struct psl *psl;
00888 unsigned *blockSizes;
00889 unsigned *qStarts;
00890 unsigned *tStarts;
00891 int blockCount;
00892 int i;
00893 int nStart, hStart;
00894 int nEnd, hEnd;
00895 
00896 AllocVar(psl);
00897 psl->blockCount = blockCount = ffAliCount(ff);
00898 psl->blockSizes = AllocArray(blockSizes, blockCount);
00899 psl->qStarts = AllocArray(qStarts, blockCount);
00900 psl->tStarts = AllocArray(tStarts, blockCount);
00901 psl->qName = cloneString(qName);
00902 psl->qSize = qSize;
00903 psl->tName = cloneString(tName);
00904 psl->tSize = tSize;
00905 psl->strand[0] = strand;
00906 
00907 for (i=0; i<blockCount; ++i)
00908     {
00909     nStart = ff->nStart - needle;
00910     nEnd = ff->nEnd - needle;
00911     hStart = ff->hStart - haystack;
00912     hEnd = ff->hEnd - haystack;
00913     blockSizes[i] = nEnd - nStart;
00914     qStarts[i] = nStart;
00915     tStarts[i] = hStart;
00916     if (i == 0)
00917        {
00918        psl->qStart = nStart;
00919        psl->tStart = hStart;
00920        }
00921     if (i == blockCount-1)
00922        {
00923        psl->qEnd = nEnd;
00924        psl->tEnd = hEnd;
00925        }
00926     ff = ff->right;
00927     }
00928 if (strand == '-')
00929     {
00930     reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
00931     }
00932 return psl;
00933 }

Here is the call graph for this function:

char* pslGetCreateSql ( char *  table,
unsigned  options,
int  tNameIdxLen 
)

Definition at line 1473 of file psl.c.

References cloneString(), createString, dyStringFree, dyStringPrintf(), errAbort(), indexString, newDyString(), PSL_TNAMEIX, PSL_WITH_BIN, PSL_XA_FORMAT, safef(), and dyString::string.

01479 {
01480 struct dyString *sqlCmd = newDyString(2048);
01481 char *sqlCmdStr;
01482 char binIx[32];
01483 
01484 binIx[0] = '\0';
01485 
01486 /* check and default tNameIdxLen */
01487 if ((tNameIdxLen > 0) && !(options & PSL_TNAMEIX))
01488     errAbort("pslGetCreateSql: must specify PSL_TNAMEIX with tNameIdxLen > 0");
01489 if ((options & PSL_TNAMEIX) && (tNameIdxLen == 0))
01490     tNameIdxLen = 8;
01491 
01492 /* setup tName and bin index fields */
01493 if (options & PSL_WITH_BIN)
01494     {
01495     if (options & PSL_TNAMEIX)
01496         safef(binIx, sizeof(binIx), "INDEX(tName(%d),bin),\n", tNameIdxLen);
01497     else
01498         safef(binIx, sizeof(binIx), "INDEX(bin),\n");
01499     }
01500 dyStringPrintf(sqlCmd, createString, table, 
01501     ((options & PSL_WITH_BIN) ? "bin smallint unsigned not null,\n" : ""));
01502 if (options & PSL_XA_FORMAT)
01503     {
01504     dyStringPrintf(sqlCmd, "qSeq longblob not null,\n");
01505     dyStringPrintf(sqlCmd, "tSeq longblob not null,\n");
01506     }
01507 dyStringPrintf(sqlCmd, indexString, binIx);
01508 sqlCmdStr = cloneString(sqlCmd->string);
01509 dyStringFree(&sqlCmd);
01510 return sqlCmdStr;
01511 }

Here is the call graph for this function:

void pslGrow ( struct psl psl,
int *  blockSpacePtr 
)

Definition at line 1956 of file psl.c.

References psl::blockSizes, ExpandArray, psl::qSequence, psl::qStarts, psl::tSequence, and psl::tStarts.

Referenced by addBlock(), and addPslBlock().

01960 {
01961 int blockSpace = *blockSpacePtr;
01962 int newSpace = 2 * blockSpace;
01963 ExpandArray(psl->blockSizes, blockSpace, newSpace);
01964 ExpandArray(psl->qStarts, blockSpace, newSpace);
01965 ExpandArray(psl->tStarts, blockSpace, newSpace);
01966 if (psl->qSequence != NULL)
01967     {
01968     ExpandArray(psl->qSequence, blockSpace, newSpace);
01969     ExpandArray(psl->tSequence, blockSpace, newSpace);
01970     }
01971 *blockSpacePtr = newSpace;
01972 }

Here is the caller graph for this function:

boolean pslHasIntron ( struct psl psl,
struct dnaSeq seq,
int  seqOffset 
)

Definition at line 1043 of file psl.c.

References psl::blockCount, blockSize, psl::blockSizes, dnaSeq::dna, FALSE, intronOrientation(), psl::qStarts, reverseIntRange(), psl::strand, TRUE, psl::tSize, and psl::tStarts.

01046 {
01047 int blockCount = psl->blockCount, i;
01048 unsigned *tStarts = psl->tStarts;
01049 unsigned *blockSizes = psl->blockSizes;
01050 unsigned *qStarts = psl->qStarts;
01051 int blockSize, start, end;
01052 DNA *dna = seq->dna;
01053 
01054 for (i=1; i<blockCount; ++i)
01055     {
01056     blockSize = blockSizes[i-1];
01057     start = qStarts[i-1]+blockSize;
01058     end = qStarts[i];
01059     if (start == end)
01060         {
01061         start = tStarts[i-1] + blockSize;
01062         end = tStarts[i];
01063         if (psl->strand[1] == '-')
01064             reverseIntRange(&start, &end, psl->tSize);
01065         start -= seqOffset;
01066         end -= seqOffset;
01067         if (intronOrientation(dna+start, dna+end) != 0)
01068             return TRUE;
01069         }
01070     }
01071 return FALSE;
01072 }

Here is the call graph for this function:

int pslIntronOrientation ( struct psl psl,
struct dnaSeq genoSeq,
int  offset 
)

Definition at line 1029 of file psl.c.

References pslWeightedIntronOrientation().

01034 {
01035 int intronDir = pslWeightedIntronOrientation(psl, genoSeq, offset);
01036 if (intronDir < 0)
01037     intronDir = -1;
01038 else if (intronDir > 0)
01039     intronDir = 1;
01040 return intronDir;
01041 }

Here is the call graph for this function:

boolean pslIsProtein ( const struct psl psl  ) 

Definition at line 790 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::strand, psl::tEnd, psl::tSize, psl::tStart, and psl::tStarts.

Referenced by pslCalcMilliBad(), pslCheck(), pslScore(), and pslTransMap().

00792 {
00793 int lastBlock = psl->blockCount - 1;
00794 
00795 return  (((psl->strand[1] == '+' ) &&
00796     (psl->tEnd == psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])) ||
00797    ((psl->strand[1] == '-') && 
00798     (psl->tStart == (psl->tSize-(psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])))));
00799 }

Here is the caller graph for this function:

static void pslLabelColumns ( FILE *  f  )  [static]

Definition at line 492 of file psl.c.

Referenced by pslWriteHead(), and pslxWriteHead().

00494 {
00495 fputs("\n"
00496 "match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n"
00497 "     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n" 
00498 "---------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
00499 f);
00500 }

Here is the caller graph for this function:

struct psl* pslLoad ( char **  row  )  [read]

Definition at line 67 of file psl.c.

References AllocVar, psl::blockCount, psl::blockSizes, cloneString(), psl::match, psl::misMatch, psl::nCount, psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qSize, psl::qStart, psl::qStarts, psl::repMatch, sqlSigned(), sqlUnsigned(), sqlUnsignedDynamicArray(), psl::strand, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, psl::tSize, psl::tStart, and psl::tStarts.

Referenced by pslNext(), pslxLoad(), and readPslToBinKeeper().

00070 {
00071 struct psl *ret;
00072 int sizeOne;
00073 
00074 AllocVar(ret);
00075 ret->blockCount = sqlUnsigned(row[17]);
00076 ret->match = sqlUnsigned(row[0]);
00077 ret->misMatch = sqlUnsigned(row[1]);
00078 ret->repMatch = sqlUnsigned(row[2]);
00079 ret->nCount = sqlUnsigned(row[3]);
00080 ret->qNumInsert = sqlUnsigned(row[4]);
00081 ret->qBaseInsert = sqlSigned(row[5]);
00082 ret->tNumInsert = sqlUnsigned(row[6]);
00083 ret->tBaseInsert = sqlSigned(row[7]);
00084 strcpy(ret->strand, row[8]);
00085 ret->qName = cloneString(row[9]);
00086 ret->qSize = sqlUnsigned(row[10]);
00087 ret->qStart = sqlUnsigned(row[11]);
00088 ret->qEnd = sqlUnsigned(row[12]);
00089 ret->tName = cloneString(row[13]);
00090 ret->tSize = sqlUnsigned(row[14]);
00091 ret->tStart = sqlUnsigned(row[15]);
00092 ret->tEnd = sqlUnsigned(row[16]);
00093 sqlUnsignedDynamicArray(row[18], &ret->blockSizes, &sizeOne);
00094 if (sizeOne != ret->blockCount)
00095     {
00096     printf("sizeOne bloxksizes %d bs %d block=%s\n",sizeOne, ret->blockCount,row[18]);
00097     }
00098 assert(sizeOne == ret->blockCount);
00099 sqlUnsignedDynamicArray(row[19], &ret->qStarts, &sizeOne);
00100 if (sizeOne != ret->blockCount)
00101     {
00102     printf("sizeOne qStarts %d bs %d\n",sizeOne, ret->blockCount);
00103     }
00104 assert(sizeOne == ret->blockCount);
00105 sqlUnsignedDynamicArray(row[20], &ret->tStarts, &sizeOne);
00106 if (sizeOne != ret->blockCount)
00107     {
00108     printf("sizeOne tStarts %d bs %d\n",sizeOne, ret->blockCount);
00109     }
00110 assert(sizeOne == ret->blockCount);
00111 return ret;
00112 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct psl* pslLoadAll ( char *  fileName  )  [read]

Definition at line 411 of file psl.c.

References lineFileClose(), pslFileOpen(), pslNext(), slAddHead, and slReverse().

00413 {
00414 struct lineFile *lf = pslFileOpen(fileName);
00415 struct psl *pslList = NULL, *psl;
00416 while ((psl = pslNext(lf)) != NULL)
00417     {
00418     slAddHead(&pslList, psl);
00419     }
00420 slReverse(&pslList);
00421 lineFileClose(&lf);
00422 return pslList;
00423 }

Here is the call graph for this function:

struct psl* pslLoadLm ( char **  row,
struct lm lm 
) [read]

Definition at line 757 of file psl.c.

References lm, lmAlloc(), lmCloneString(), sqlSigned(), sqlUnsigned(), and sqlUnsignedArray().

Referenced by loadPsl(), and pslxLoadLm().

00759 {
00760 struct psl *ret;
00761 
00762 ret = lmAlloc(lm, sizeof(*ret));
00763 ret->blockCount = sqlUnsigned(row[17]);
00764 ret->match = sqlUnsigned(row[0]);
00765 ret->misMatch = sqlUnsigned(row[1]);
00766 ret->repMatch = sqlUnsigned(row[2]);
00767 ret->nCount = sqlUnsigned(row[3]);
00768 ret->qNumInsert = sqlUnsigned(row[4]);
00769 ret->qBaseInsert = sqlSigned(row[5]);
00770 ret->tNumInsert = sqlUnsigned(row[6]);
00771 ret->tBaseInsert = sqlSigned(row[7]);
00772 strcpy(ret->strand, row[8]);
00773 ret->qName = lmCloneString(lm,row[9]);
00774 ret->qSize = sqlUnsigned(row[10]);
00775 ret->qStart = sqlUnsigned(row[11]);
00776 ret->qEnd = sqlUnsigned(row[12]);
00777 ret->tName = lmCloneString(lm, row[13]);
00778 ret->tSize = sqlUnsigned(row[14]);
00779 ret->tStart = sqlUnsigned(row[15]);
00780 ret->tEnd = sqlUnsigned(row[16]);
00781 ret->blockSizes = lmAlloc(lm, sizeof(ret->blockSizes[0]) * ret->blockCount);
00782 sqlUnsignedArray(row[18], ret->blockSizes, ret->blockCount);
00783 ret->qStarts = lmAlloc(lm, sizeof(ret->qStarts[0]) * ret->blockCount);
00784 sqlUnsignedArray(row[19], ret->qStarts, ret->blockCount);
00785 ret->tStarts = lmAlloc(lm, sizeof(ret->tStarts[0]) * ret->blockCount);
00786 sqlUnsignedArray(row[20], ret->tStarts, ret->blockCount);
00787 return ret;
00788 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct psl* pslNew ( char *  qName,
unsigned  qSize,
int  qStart,
int  qEnd,
char *  tName,
unsigned  tSize,
int  tStart,
int  tEnd,
char *  strand,
unsigned  blockSpace,
unsigned  opts 
) [read]

Definition at line 1926 of file psl.c.

References AllocArray, AllocVar, psl::blockSizes, cloneString(), PSL_XA_FORMAT, psl::qEnd, psl::qName, psl::qSequence, psl::qSize, psl::qStart, psl::qStarts, psl::strand, psl::tEnd, psl::tName, psl::tSequence, psl::tSize, psl::tStart, and psl::tStarts.

Referenced by createMappedPsl(), and pslFromAlign().

01932 {
01933 struct psl *psl;
01934 AllocVar(psl);
01935 assert(blockSpace > 0);
01936 psl->qName = cloneString(qName);
01937 psl->qSize = qSize;
01938 psl->qStart = qStart;
01939 psl->qEnd = qEnd;
01940 psl->tName = cloneString(tName);
01941 psl->tSize = tSize;
01942 psl->tStart = tStart;
01943 psl->tEnd = tEnd;
01944 strncpy(psl->strand, strand, 2);
01945 AllocArray(psl->blockSizes, blockSpace);
01946 AllocArray(psl->qStarts, blockSpace);
01947 AllocArray(psl->tStarts, blockSpace);
01948 if (opts & PSL_XA_FORMAT)
01949     {
01950     AllocArray(psl->qSequence, blockSpace);
01951     AllocArray(psl->tSequence, blockSpace);
01952     }
01953 return psl;
01954 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct psl* pslNext ( struct lineFile lf  )  [read]

Definition at line 708 of file psl.c.

References chopLine, errAbort(), lineFile::fileName, lineFileNextReal(), lineFile::lineIx, needMoreMem(), pslLoad(), and pslxLoad().

Referenced by aliLines(), doDetailLine(), and pslLoadAll().

00711 {
00712 char *line;
00713 int lineSize;
00714 char *words[32];
00715 int wordCount;
00716 static int lineAlloc = 0;
00717 static char *chopBuf = NULL;
00718 
00719 if (!lineFileNextReal(lf, &line))
00720     {
00721     return NULL;
00722     }
00723 lineSize = strlen(line);
00724 if (lineSize >= lineAlloc)
00725     {
00726     lineAlloc = lineSize+256;
00727     chopBuf = needMoreMem(chopBuf, 0, lineAlloc);
00728     }
00729 memcpy(chopBuf, line, lineSize+1);
00730 wordCount = chopLine(chopBuf, words);
00731 if (wordCount == 21)
00732     {
00733     return pslLoad(words);
00734     }
00735 if (wordCount == 23)
00736     {
00737     return pslxLoad(words);
00738     }
00739 else
00740     {
00741     errAbort("Bad line %d of %s wordCount is %d instead of 21 or 23\n", lf->lineIx, lf->fileName, wordCount);
00742     return NULL;
00743     }
00744 }

Here is the call graph for this function:

Here is the caller graph for this function:

int pslOrientation ( struct psl psl  ) 

Definition at line 979 of file psl.c.

References psl::strand.

Referenced by pslTailSizes().

00981 {
00982 if (psl->strand[1] != '\0')
00983     {
00984     /* translated blat */
00985     if (psl->strand[0] != psl->strand[1])
00986         return -1;
00987     else
00988         return 1;
00989     }
00990 else
00991     {
00992     if (psl->strand[0] == '-')
00993         return -1;
00994     else
00995         return 1;
00996     }
00997 }

Here is the caller graph for this function:

void pslOutFormat ( struct psl el,
FILE *  f,
char  sep,
char  lastSep 
)

Definition at line 355 of file psl.c.

References errAbort(), psl::match, psl::misMatch, psl::nCount, psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::repMatch, startsWith(), psl::strand, psl::tEnd, psl::tName, and psl::tStart.

00360 {
00361 const char *headers[] = {"Matches", "Mismatches", "Matches in repeats", "Number of N bases", "Query name", "Size", "Start", "End", "Chromosome", "Strand", "Start", "End"};
00362 char *hformat = "<B>%s:</B> "; /* string for formatted print for headers */
00363 char *uformat = "<B>%s:</B> %u%c"; /* string for formatted print for unsigned variable */
00364 char *targName;
00365 
00366 fprintf(f, uformat, headers[0], el->match, sep);
00367 fprintf(f, uformat, headers[1], el->misMatch, sep);
00368 fprintf(f, uformat, headers[2], el->repMatch, sep);
00369 fprintf(f, uformat, headers[3], el->nCount, sep);
00370 
00371 fprintf(f, hformat, headers[4]);
00372 if (sep == ',') fputc('"',f);
00373 fprintf(f, "%s", el->qName);
00374 if (sep == ',') fputc('"',f);
00375 fputc(sep,f);
00376 
00377 fprintf(f, uformat, headers[5], el->qSize, sep);
00378 fprintf(f, uformat, headers[6], el->qStart, sep);
00379 fprintf(f, uformat, headers[7], el->qEnd, sep);
00380 
00381 fprintf(f, hformat, headers[8]);
00382 if (sep == ',') fputc('"',f);
00383 /* skip leading 'chr' in string to get only chromosome part */
00384 targName = el->tName;
00385 if (startsWith("chr", el->tName))
00386    targName += 3;
00387 fprintf(f, "%s", targName);
00388 
00389 if (sep == ',') fputc('"',f);
00390 fputc(sep,f);
00391 
00392 fprintf(f, hformat, headers[9]);
00393 if (sep == ',') fputc('"',f);
00394 fprintf(f, "%s", el->strand);
00395 if (sep == ',') fputc('"',f);
00396 fputc(sep,f);
00397 
00398 fprintf(f, uformat, headers[10], el->tStart, sep);
00399 fprintf(f, uformat, headers[11], el->tEnd, sep);
00400 
00401 fputc(lastSep,f);
00402 
00403 if (ferror(f))
00404     {
00405     perror("Error writing psl file\n");
00406     errAbort("\n");
00407     }
00408 
00409 }

Here is the call graph for this function:

void pslOutput ( struct psl el,
FILE *  f,
char  sep,
char  lastSep 
)

Definition at line 208 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::match, psl::misMatch, psl::nCount, psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qSequence, psl::qSize, psl::qStart, psl::qStarts, psl::repMatch, psl::strand, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, psl::tSequence, psl::tSize, psl::tStart, and psl::tStarts.

00210 {
00211 int i;
00212 fprintf(f, "%u", el->match);
00213 fputc(sep,f);
00214 fprintf(f, "%u", el->misMatch);
00215 fputc(sep,f);
00216 fprintf(f, "%u", el->repMatch);
00217 fputc(sep,f);
00218 fprintf(f, "%u", el->nCount);
00219 fputc(sep,f);
00220 fprintf(f, "%u", el->qNumInsert);
00221 fputc(sep,f);
00222 fprintf(f, "%d", el->qBaseInsert);
00223 fputc(sep,f);
00224 fprintf(f, "%u", el->tNumInsert);
00225 fputc(sep,f);
00226 fprintf(f, "%d", el->tBaseInsert);
00227 fputc(sep,f);
00228 if (sep == ',') fputc('"',f);
00229 fprintf(f, "%s", el->strand);
00230 if (sep == ',') fputc('"',f);
00231 fputc(sep,f);
00232 if (sep == ',') fputc('"',f);
00233 fprintf(f, "%s", el->qName);
00234 if (sep == ',') fputc('"',f);
00235 fputc(sep,f);
00236 fprintf(f, "%u", el->qSize);
00237 fputc(sep,f);
00238 fprintf(f, "%u", el->qStart);
00239 fputc(sep,f);
00240 fprintf(f, "%u", el->qEnd);
00241 fputc(sep,f);
00242 if (sep == ',') fputc('"',f);
00243 fprintf(f, "%s", el->tName);
00244 if (sep == ',') fputc('"',f);
00245 fputc(sep,f);
00246 fprintf(f, "%u", el->tSize);
00247 fputc(sep,f);
00248 fprintf(f, "%u", el->tStart);
00249 fputc(sep,f);
00250 fprintf(f, "%u", el->tEnd);
00251 fputc(sep,f);
00252 fprintf(f, "%u", el->blockCount);
00253 fputc(sep,f);
00254 if (sep == ',') fputc('{',f);
00255 for (i=0; i<el->blockCount; ++i)
00256     {
00257     fprintf(f, "%u", el->blockSizes[i]);
00258     fputc(',', f);
00259     }
00260 if (sep == ',') fputc('}',f);
00261 fputc(sep,f);
00262 if (sep == ',') fputc('{',f);
00263 for (i=0; i<el->blockCount; ++i)
00264     {
00265     fprintf(f, "%u", el->qStarts[i]);
00266     fputc(',', f);
00267     }
00268 if (sep == ',') fputc('}',f);
00269 fputc(sep,f);
00270 if (sep == ',') fputc('{',f);
00271 for (i=0; i<el->blockCount; ++i)
00272     {
00273     fprintf(f, "%u", el->tStarts[i]);
00274     fputc(',', f);
00275     }
00276 if (sep == ',') fputc('}',f);
00277 if (el->qSequence)
00278     {
00279     fputc(sep,f);
00280     if (sep == ',') fputc('{',f);
00281     for (i=0; i<el->blockCount; ++i)
00282         {
00283         fprintf(f, "%s", el->qSequence[i]);
00284         fputc(',', f);
00285         }
00286     if (sep == ',') fputc('}',f);
00287     fputc(sep,f);
00288     if (sep == ',') fputc('{',f);
00289     for (i=0; i<el->blockCount; ++i)
00290         {
00291         fprintf(f, "%s", el->tSequence[i]);
00292         fputc(',', f);
00293         }
00294     if (sep == ',') fputc('}',f);
00295     }
00296 
00297 fputc(lastSep,f);
00298 if (ferror(f))
00299     {
00300     perror("Error writing psl file\n");
00301     errAbort("\n");
00302     }
00303 }

void pslOutputShort ( struct psl el,
FILE *  f,
char  sep,
char  lastSep 
)

Definition at line 307 of file psl.c.

References psl::blockCount, errAbort(), psl::match, psl::misMatch, psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qStart, psl::repMatch, psl::strand, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, and psl::tStart.

00309 {
00310 fprintf(f, "%u", el->match);
00311 fputc(sep,f);
00312 fprintf(f, "%u", el->misMatch);
00313 fputc(sep,f);
00314 fprintf(f, "%u", el->repMatch);
00315 fputc(sep,f);
00316 fprintf(f, "%u", el->qNumInsert);
00317 fputc(sep,f);
00318 fprintf(f, "%d", el->qBaseInsert);
00319 fputc(sep,f);
00320 fprintf(f, "%u", el->tNumInsert);
00321 fputc(sep,f);
00322 fprintf(f, "%d", el->tBaseInsert);
00323 fputc(sep,f);
00324 if (sep == ',') fputc('"',f);
00325 fprintf(f, "%s", el->strand);
00326 if (sep == ',') fputc('"',f);
00327 fputc(sep,f);
00328 if (sep == ',') fputc('"',f);
00329 fprintf(f, "%s", el->qName);
00330 if (sep == ',') fputc('"',f);
00331 fputc(sep,f);
00332 fprintf(f, "%u", el->qStart);
00333 fputc(sep,f);
00334 fprintf(f, "%u", abs(el->qEnd - el->qStart));
00335 fputc(sep,f);
00336 if (sep == ',') fputc('"',f);
00337 fprintf(f, "%s", el->tName);
00338 if (sep == ',') fputc('"',f);
00339 fputc(sep,f);
00340 fprintf(f, "%u", el->tStart);
00341 fputc(sep,f);
00342 fprintf(f, "%u", abs(el->tEnd - el->tStart));
00343 fputc(sep,f);
00344 fprintf(f, "%u", el->blockCount);
00345 fputc(sep,f);
00346 if (sep == ',') fputc('{',f);
00347 fputc(lastSep,f);
00348 if (ferror(f))
00349     {
00350     perror("Error writing psl file\n");
00351     errAbort("\n");
00352     }
00353 }

Here is the call graph for this function:

int pslRangeTreeOverlap ( struct psl psl,
struct rbTree rangeTree 
)

Definition at line 1974 of file psl.c.

References psl::blockCount, psl::blockSizes, rangeTreeOverlapSize(), reverseIntRange(), psl::strand, psl::tSize, and psl::tStarts.

01976 {
01977 int i;
01978 int overlap = 0;
01979 boolean isRc = (psl->strand[1] == '-');
01980 for (i=0; i<psl->blockCount; ++i)
01981     {
01982     int start = psl->tStarts[i];
01983     int end = start + psl->blockSizes[i];
01984     if (isRc)
01985         reverseIntRange(&start, &end, psl->tSize);
01986     overlap += rangeTreeOverlapSize(rangeTree, start, end);
01987     }
01988 return overlap;
01989 }

Here is the call graph for this function:

void pslRc ( struct psl psl  ) 

Definition at line 1149 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::qSequence, psl::qSize, psl::qStarts, rcSeqs(), reverseUnsigned(), psl::strand, psl::tSequence, psl::tSize, and psl::tStarts.

Referenced by adjustOrientation(), and pslTransMap().

01151 {
01152 int i;
01153 
01154 /* swap strand, forcing target to have an explict strand */
01155 psl->strand[0] = (psl->strand[0] != '-') ? '-' : '+';
01156 psl->strand[1] = (psl->strand[1] != '-') ? '-' : '+';
01157 
01158 for (i = 0; i < psl->blockCount; i++)
01159     {
01160     psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]);
01161     psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]);
01162     }
01163 reverseUnsigned(psl->tStarts, psl->blockCount);
01164 reverseUnsigned(psl->qStarts, psl->blockCount);
01165 reverseUnsigned(psl->blockSizes, psl->blockCount);
01166 if (psl->qSequence != NULL)
01167     {
01168     rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes);
01169     rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes);
01170     }
01171 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslRcBoth ( struct psl psl  ) 

Definition at line 1130 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::qSize, psl::qStarts, reverseIntRange(), reverseUnsigned(), psl::tEnd, psl::tSize, psl::tStart, and psl::tStarts.

Referenced by pslGenoShowAlignment(), and pslShowAlignment().

01133 {
01134 int tSize = psl->tSize, qSize = psl->qSize;
01135 int blockCount = psl->blockCount, i;
01136 unsigned *tStarts = psl->tStarts, *qStarts = psl->qStarts, *blockSizes = psl->blockSizes;
01137 
01138 reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize);
01139 for (i=0; i<blockCount; ++i)
01140     {
01141     tStarts[i] = (int)tSize - ((int)tStarts[i] + (int)blockSizes[i]);
01142     qStarts[i] = (int)qSize - ((int)qStarts[i] + (int)blockSizes[i]);
01143     }
01144 reverseUnsigned(tStarts, blockCount);
01145 reverseUnsigned(qStarts, blockCount);
01146 reverseUnsigned(blockSizes, blockCount);
01147 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void pslRecalcBounds ( struct psl psl  )  [static]

Definition at line 1281 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::qEnd, psl::qSize, psl::qStart, psl::qStarts, reverseIntRange(), psl::strand, psl::tEnd, psl::tSize, psl::tStart, and psl::tStarts.

Referenced by pslTrimToQueryRange(), and pslTrimToTargetRange().

01284 {
01285 int qStart, qEnd, tStart, tEnd, size;
01286 int last = psl->blockCount - 1;
01287 qStart = psl->qStarts[0];
01288 tStart = psl->tStarts[0];
01289 size = psl->blockSizes[last];
01290 qEnd = psl->qStarts[last] + size;
01291 tEnd = psl->tStarts[last] + size;
01292 if (psl->strand[0] == '-')
01293     reverseIntRange(&qStart, &qEnd, psl->qSize);
01294 if (psl->strand[1] == '-')
01295     reverseIntRange(&tStart, &tEnd, psl->tSize);
01296 psl->qStart = qStart;
01297 psl->qEnd = qEnd;
01298 psl->tStart = tStart;
01299 psl->tEnd = tEnd;
01300 }

Here is the call graph for this function:

Here is the caller graph for this function:

int pslScore ( const struct psl psl  ) 

Definition at line 834 of file psl.c.

References psl::match, psl::misMatch, pslIsProtein(), psl::qNumInsert, psl::repMatch, and psl::tNumInsert.

Referenced by aliLines(), pslCmpQueryScore(), pslCmpScore(), pslCmpScoreDesc(), and pslCmpTargetScore().

00836 {
00837 int sizeMul = pslIsProtein(psl) ? 3 : 1;
00838 
00839 return sizeMul * (psl->match + ( psl->repMatch>>1)) - 
00840         sizeMul * psl->misMatch - psl->qNumInsert - psl->tNumInsert;
00841 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslSwap ( struct psl psl,
boolean  noRc 
)

Definition at line 1218 of file psl.c.

References psl::qBaseInsert, psl::qEnd, psl::qName, psl::qNumInsert, psl::qSize, psl::qStart, psl::strand, swapBlocks(), swapRCBlocks(), swapVars, psl::tBaseInsert, psl::tEnd, psl::tName, psl::tNumInsert, psl::tSize, and psl::tStart.

01221 {
01222 int itmp;
01223 unsigned utmp;
01224 char ctmp, *stmp; 
01225 swapVars(psl->qBaseInsert, psl->tBaseInsert, utmp);
01226 swapVars(psl->tNumInsert, psl->qNumInsert, utmp);
01227 swapVars(psl->qName, psl->tName, stmp);
01228 swapVars(psl->qSize, psl->tSize, utmp);
01229 swapVars(psl->qStart, psl->tStart, itmp);
01230 swapVars(psl->qEnd, psl->tEnd, itmp);
01231 
01232 /* handle strand and block copy */
01233 if (psl->strand[1] != '\0')
01234     {
01235     /* translated */
01236     swapVars(psl->strand[0], psl->strand[1], ctmp);
01237     swapBlocks(psl);
01238     }
01239 else if (noRc)
01240     {
01241     /* untranslated with no reverse complement */
01242     psl->strand[1] = psl->strand[0];
01243     psl->strand[0] = '+';
01244     swapBlocks(psl);
01245     }
01246 else
01247     {
01248     /* untranslated */
01249     if (psl->strand[0] == '+')
01250         swapBlocks(psl);
01251     else
01252         swapRCBlocks(psl);
01253     }
01254 }

Here is the call graph for this function:

void pslTailSizes ( struct psl psl,
int *  retStartTail,
int *  retEndTail 
)

Definition at line 1074 of file psl.c.

References min, pslOrientation(), psl::qEnd, psl::qSize, psl::qStart, psl::tEnd, psl::tSize, and psl::tStart.

01076 {
01077 int orientation = pslOrientation(psl);
01078 int qFloppyStart, qFloppyEnd;
01079 int tFloppyStart, tFloppyEnd;
01080 
01081 if (orientation > 0)
01082     {
01083     qFloppyStart = psl->qStart;
01084     qFloppyEnd = psl->qSize - psl->qEnd;
01085     }
01086 else
01087     {
01088     qFloppyStart = psl->qSize - psl->qEnd;
01089     qFloppyEnd = psl->qStart;
01090     }
01091 tFloppyStart = psl->tStart;
01092 tFloppyEnd = psl->tSize - psl->tEnd;
01093 *retStartTail = min(qFloppyStart, tFloppyStart);
01094 *retEndTail = min(qFloppyEnd, tFloppyEnd);
01095 }

Here is the call graph for this function:

void pslTargetOffset ( struct psl psl,
int  offset 
)

Definition at line 1256 of file psl.c.

References psl::blockCount, psl::tEnd, psl::tStart, and psl::tStarts.

01258 {
01259 int i, blockCount = psl->blockCount;
01260 unsigned *tStarts = psl->tStarts;
01261 psl->tStart += offset;
01262 psl->tEnd += offset;
01263 for (i=0; i<blockCount; ++i)
01264    tStarts[i] += offset;
01265 }

struct ffAli* pslToFakeFfAli ( struct psl psl,
DNA needle,
DNA haystack 
) [read]

Definition at line 852 of file psl.c.

References AllocVar, psl::blockCount, psl::blockSizes, ffMakeRightLinks(), ffAli::nStart, psl::qStarts, and psl::tStarts.

00856 {
00857 struct ffAli *ffList = NULL, *ff;
00858 int blockCount = psl->blockCount;
00859 unsigned *blockSizes = psl->blockSizes;
00860 unsigned *qStarts = psl->qStarts;
00861 unsigned *tStarts = psl->tStarts;
00862 int size;
00863 int i;
00864 
00865 for (i=0; i<blockCount; ++i)
00866     {
00867     size = blockSizes[i];
00868     AllocVar(ff);
00869     ff->left = ffList;
00870     ffList = ff;
00871     ff->nStart = ff->nEnd = needle + qStarts[i];
00872     ff->nEnd += size;
00873     ff->hStart = ff->hEnd = haystack + tStarts[i];
00874     ff->hEnd += size;
00875     }
00876 ffList = ffMakeRightLinks(ffList);
00877 return ffList;
00878 }

Here is the call graph for this function:

struct ffAli* pslToFfAli ( struct psl psl,
struct dnaSeq query,
struct dnaSeq target,
int  targetOffset 
) [read]

Definition at line 935 of file psl.c.

References AllocVar, psl::blockCount, psl::blockSizes, dnaSeq::dna, ffCountGoodEnds(), ffMakeRightLinks(), ffAli::nStart, psl::qStarts, dnaSeq::size, and psl::tStarts.

00939 {
00940 struct ffAli *ffList = NULL, *ff;
00941 DNA *needle = query->dna;
00942 DNA *haystack = target->dna;
00943 int blockCount = psl->blockCount;
00944 unsigned *blockSizes = psl->blockSizes;
00945 unsigned *qStarts = psl->qStarts;
00946 unsigned *tStarts = psl->tStarts;
00947 int size;
00948 int i;
00949 int tMin = targetOffset;
00950 int tMax = targetOffset + target->size;
00951 int tStart, tEnd;
00952 int clipStart, clipEnd, clipOffset, clipSize;
00953 
00954 for (i=0; i<blockCount; ++i)
00955     {
00956     clipStart = tStart = tStarts[i];
00957     size = blockSizes[i];
00958     clipEnd = tEnd = tStart + size;
00959     if (tStart < tMax && tEnd > tMin)
00960         {
00961         if (clipStart < tMin) clipStart = tMin;
00962         if (clipEnd > tMax) clipEnd = tMax;
00963         clipOffset = clipStart - tStart;
00964         clipSize = clipEnd - clipStart;
00965         AllocVar(ff);
00966         ff->left = ffList;
00967         ffList = ff;
00968         ff->nStart = ff->nEnd = needle + qStarts[i] + clipOffset;
00969         ff->nEnd += clipSize;
00970         ff->hStart = ff->hEnd = haystack + clipStart - targetOffset;
00971         ff->hEnd += clipSize;
00972         }
00973     }
00974 ffList = ffMakeRightLinks(ffList);
00975 ffCountGoodEnds(ffList);
00976 return ffList;
00977 }

Here is the call graph for this function:

struct psl* pslTrimToQueryRange ( struct psl oldPsl,
int  qMin,
int  qMax 
) [read]

Definition at line 1388 of file psl.c.

References AllocArray, AllocVar, psl::blockCount, psl::blockSizes, cloneString(), pslRecalcBounds(), psl::qEnd, psl::qName, psl::qSize, psl::qStart, psl::qStarts, rangeIntersection(), reverseIntRange(), psl::strand, psl::tName, psl::tSize, and psl::tStarts.

01391 {
01392 int newSize;
01393 int oldBlockCount = oldPsl->blockCount;
01394 boolean qIsRc = (oldPsl->strand[0] == '-');
01395 int newBlockCount = 0, completeBlockCount = 0;
01396 int i;
01397 struct psl *newPsl = NULL;
01398 int qMn = qMin, qMx = qMax;   /* qMin/qMax adjusted for strand. */
01399 
01400 /* Deal with case where we're completely trimmed out quickly. */
01401 newSize = rangeIntersection(oldPsl->qStart, oldPsl->qEnd, qMin, qMax);
01402 if (newSize <= 0)
01403     return NULL;
01404 
01405 if (qIsRc)
01406     reverseIntRange(&qMn, &qMx, oldPsl->qSize);
01407 
01408 /* Count how many blocks will survive trimming. */
01409 oldBlockCount = oldPsl->blockCount;
01410 for (i=0; i<oldBlockCount; ++i)
01411     {
01412     int s = oldPsl->qStarts[i];
01413     int e = s + oldPsl->blockSizes[i];
01414     int sz = e - s;
01415     int overlap;
01416     if ((overlap = rangeIntersection(s, e, qMn, qMx)) > 0)
01417         ++newBlockCount;
01418     if (overlap == sz)
01419         ++completeBlockCount;
01420     }
01421 
01422 if (newBlockCount == 0)
01423     return NULL;
01424 
01425 /* Allocate new psl and fill in what we already know. */
01426 AllocVar(newPsl);
01427 strcpy(newPsl->strand, oldPsl->strand);
01428 newPsl->qName = cloneString(oldPsl->qName);
01429 newPsl->qSize = oldPsl->qSize;
01430 newPsl->tName = cloneString(oldPsl->tName);
01431 newPsl->tSize = oldPsl->tSize;
01432 newPsl->blockCount = newBlockCount;
01433 AllocArray(newPsl->blockSizes, newBlockCount);
01434 AllocArray(newPsl->qStarts, newBlockCount);
01435 AllocArray(newPsl->tStarts, newBlockCount);
01436 
01437 /* Fill in blockSizes, qStarts, tStarts with real data. */
01438 newBlockCount = completeBlockCount = 0;
01439 for (i=0; i<oldBlockCount; ++i)
01440     {
01441     int oldSz = oldPsl->blockSizes[i];
01442     int sz = oldSz;
01443     int qS = oldPsl->qStarts[i];
01444     int qE = qS + sz;
01445     int tS = oldPsl->tStarts[i];
01446     int tE = tS + sz;
01447     if (rangeIntersection(qS, qE, qMn, qMx) > 0)
01448         {
01449         int diff;
01450         if ((diff = (qMn - qS)) > 0)
01451             {
01452             tS += diff;
01453             qS += diff;
01454             sz -= diff;
01455             }
01456         if ((diff = (qE - qMx)) > 0)
01457             {
01458             tE -= diff;
01459             qE -= diff;
01460             sz -= diff;
01461             }
01462         newPsl->qStarts[newBlockCount] = qS;
01463         newPsl->tStarts[newBlockCount] = tS;
01464         newPsl->blockSizes[newBlockCount] = sz;
01465         ++newBlockCount;
01466         if (sz == oldSz)
01467             ++completeBlockCount;
01468         }
01469     }
01470 pslRecalcBounds(newPsl);
01471 return newPsl;
01472 }

Here is the call graph for this function:

struct psl* pslTrimToTargetRange ( struct psl oldPsl,
int  tMin,
int  tMax 
) [read]

Definition at line 1302 of file psl.c.

References AllocArray, AllocVar, psl::blockCount, psl::blockSizes, cloneString(), pslRecalcBounds(), psl::qName, psl::qSize, psl::qStarts, rangeIntersection(), reverseIntRange(), psl::strand, psl::tEnd, psl::tName, psl::tSize, psl::tStart, and psl::tStarts.

01305 {
01306 int newSize;
01307 int oldBlockCount = oldPsl->blockCount;
01308 boolean tIsRc = (oldPsl->strand[1] == '-');
01309 int newBlockCount = 0, completeBlockCount = 0;
01310 int i;
01311 struct psl *newPsl = NULL;
01312 int tMn = tMin, tMx = tMax;   /* tMin/tMax adjusted for strand. */
01313 
01314 /* Deal with case where we're completely trimmed out quickly. */
01315 newSize = rangeIntersection(oldPsl->tStart, oldPsl->tEnd, tMin, tMax);
01316 if (newSize <= 0)
01317     return NULL;
01318 
01319 if (tIsRc)
01320     reverseIntRange(&tMn, &tMx, oldPsl->tSize);
01321 
01322 /* Count how many blocks will survive trimming. */
01323 oldBlockCount = oldPsl->blockCount;
01324 for (i=0; i<oldBlockCount; ++i)
01325     {
01326     int s = oldPsl->tStarts[i];
01327     int e = s + oldPsl->blockSizes[i];
01328     int sz = e - s;
01329     int overlap;
01330     if ((overlap = rangeIntersection(s, e, tMn, tMx)) > 0)
01331         ++newBlockCount;
01332     if (overlap == sz)
01333         ++completeBlockCount;
01334     }
01335 
01336 if (newBlockCount == 0)
01337     return NULL;
01338 
01339 /* Allocate new psl and fill in what we already know. */
01340 AllocVar(newPsl);
01341 strcpy(newPsl->strand, oldPsl->strand);
01342 newPsl->qName = cloneString(oldPsl->qName);
01343 newPsl->qSize = oldPsl->qSize;
01344 newPsl->tName = cloneString(oldPsl->tName);
01345 newPsl->tSize = oldPsl->tSize;
01346 newPsl->blockCount = newBlockCount;
01347 AllocArray(newPsl->blockSizes, newBlockCount);
01348 AllocArray(newPsl->qStarts, newBlockCount);
01349 AllocArray(newPsl->tStarts, newBlockCount);
01350 
01351 /* Fill in blockSizes, qStarts, tStarts with real data. */
01352 newBlockCount = completeBlockCount = 0;
01353 for (i=0; i<oldBlockCount; ++i)
01354     {
01355     int oldSz = oldPsl->blockSizes[i];
01356     int sz = oldSz;
01357     int tS = oldPsl->tStarts[i];
01358     int tE = tS + sz;
01359     int qS = oldPsl->qStarts[i];
01360     int qE = qS + sz;
01361     if (rangeIntersection(tS, tE, tMn, tMx) > 0)
01362         {
01363         int diff;
01364         if ((diff = (tMn - tS)) > 0)
01365             {
01366             tS += diff;
01367             qS += diff;
01368             sz -= diff;
01369             }
01370         if ((diff = (tE - tMx)) > 0)
01371             {
01372             tE -= diff;
01373             qE -= diff;
01374             sz -= diff;
01375             }
01376         newPsl->qStarts[newBlockCount] = qS;
01377         newPsl->tStarts[newBlockCount] = tS;
01378         newPsl->blockSizes[newBlockCount] = sz;
01379         ++newBlockCount;
01380         if (sz == oldSz)
01381             ++completeBlockCount;
01382         }
01383     }
01384 pslRecalcBounds(newPsl);
01385 return newPsl;
01386 }

Here is the call graph for this function:

int pslWeightedIntronOrientation ( struct psl psl,
struct dnaSeq genoSeq,
int  offset 
)

Definition at line 999 of file psl.c.

References psl::blockCount, blockSize, psl::blockSizes, dnaSeq::dna, errAbort(), intronOrientation(), psl::qStarts, psl::strand, and psl::tStarts.

Referenced by pslIntronOrientation().

01005 {
01006 int intronDir = 0;
01007 int oneDir;
01008 int i;
01009 DNA *dna = genoSeq->dna;
01010 
01011 /* code below doesn't support negative target strand (translated blat) */
01012 if (psl->strand[1] == '-')
01013     errAbort("pslWeightedIntronOrientation doesn't support a negative target strand");
01014 
01015 for (i=1; i<psl->blockCount; ++i)
01016     {
01017     int iStart, iEnd, blockSize = psl->blockSizes[i-1];
01018     if (psl->qStarts[i-1] + blockSize == psl->qStarts[i])
01019         {
01020         iStart = psl->tStarts[i-1] + psl->blockSizes[i-1] - offset;
01021         iEnd = psl->tStarts[i] - offset;
01022         oneDir = intronOrientation(dna+iStart, dna+iEnd);
01023         intronDir += oneDir;
01024         }
01025     }
01026 return intronDir;
01027 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslWriteAll ( struct psl pslList,
char *  fileName,
boolean  writeHeader 
)

Definition at line 516 of file psl.c.

References mustOpen(), psl::next, pslTabOut, and pslWriteHead().

00518 {
00519 FILE *f;
00520 struct psl *psl;
00521 
00522 f = mustOpen(fileName, "w");
00523 if (writeHeader)
00524     pslWriteHead(f);
00525 for (psl = pslList; psl != NULL; psl = psl->next)
00526     pslTabOut(psl, f);
00527 fclose(f);
00528 }

Here is the call graph for this function:

void pslWriteHead ( FILE *  f  ) 

Definition at line 509 of file psl.c.

References pslLabelColumns().

Referenced by aliLines(), pslHead(), and pslWriteAll().

00511 {
00512 fputs("psLayout version 3\n", f);
00513 pslLabelColumns(f);
00514 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslxFileOpen ( char *  fileName,
enum gfType retQueryType,
enum gfType retTargetType,
struct lineFile **  retLf 
)

Definition at line 530 of file psl.c.

References chopLine, cloneString(), errAbort(), freeMem(), gftDna, gftRna, gfTypeFromName(), lineFileNext(), lineFileOpen(), lineFileReuse(), sameString, startsWith(), TRUE, and warn().

Referenced by pslFileOpen().

00533 {
00534 char *line;
00535 int lineSize;
00536 char *words[30];
00537 char *version;
00538 int wordCount;
00539 int i;
00540 enum gfType qt = gftRna,  tt = gftDna;
00541 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00542 
00543 if (!lineFileNext(lf, &line, &lineSize))
00544     warn("%s is empty", fileName);
00545 else
00546     {
00547     if (startsWith("psLayout version", line))
00548         {
00549         wordCount = chopLine(line, words);
00550         if (wordCount < 3)
00551             errAbort("%s is not a psLayout file", fileName);
00552         version = words[2];
00553         if (sameString(version, "3"))
00554             {
00555             }
00556         else if (sameString(version, "4"))
00557             {
00558             qt = gfTypeFromName(words[3]);
00559             tt = gfTypeFromName(words[4]);
00560             }
00561         else
00562             {
00563             errAbort("%s is version %s of psLayout, this program can only handle through version 4",
00564                 fileName,  version);
00565             }
00566         for (i=0; i<4; ++i)
00567             {
00568             if (!lineFileNext(lf, &line, &lineSize))
00569                 errAbort("%s severely truncated", fileName);
00570             }
00571         }
00572     else
00573         {
00574         char *s = cloneString(line);
00575         while (line != NULL && line[0] == '#')
00576             {
00577             freeMem(s);
00578             lineFileNext(lf, &line, &lineSize);
00579             s = cloneString(line);
00580             }
00581         wordCount = chopLine(s, words);
00582         if (wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-'))
00583             errAbort("%s is not a psLayout file", fileName);
00584         else
00585             lineFileReuse(lf); 
00586         freeMem(s);
00587         }
00588     }
00589 *retQueryType = qt;
00590 *retTargetType = tt;
00591 *retLf = lf;
00592 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslxFileOpenWithMeta ( char *  fileName,
enum gfType retQueryType,
enum gfType retTargetType,
struct lineFile **  retLf,
FILE *  f 
)

Definition at line 663 of file psl.c.

References FALSE, and pslxFileOpenWithMetaConfig().

Referenced by pslFileOpenWithMeta().

00666 {
00667 pslxFileOpenWithMetaConfig(fileName, FALSE, retQueryType, retTargetType, retLf, f);
00668 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void pslxFileOpenWithMetaConfig ( char *  fileName,
bool  isMetaUnique,
enum gfType retQueryType,
enum gfType retTargetType,
struct lineFile **  retLf,
FILE *  f 
) [static]

Definition at line 594 of file psl.c.

References chopLine, cloneString(), errAbort(), FALSE, freeMem(), gftDna, gftRna, gfTypeFromName(), lineFileNext(), lineFileOpen(), lineFileReuse(), lineFileSetMetaDataOutput(), lineFileSetUniqueMetaData(), sameString, startsWith(), TRUE, and warn().

Referenced by pslxFileOpenWithMeta(), and pslxFileOpenWithUniqueMeta().

00597 {
00598 char *line;
00599 int lineSize;
00600 char *words[30];
00601 char *version;
00602 int wordCount;
00603 int i;
00604 enum gfType qt = gftRna,  tt = gftDna;
00605 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00606 
00607 lineFileSetMetaDataOutput(lf, f);
00608 if (isMetaUnique)
00609     lineFileSetUniqueMetaData(lf);
00610 if (!lineFileNext(lf, &line, &lineSize))
00611     warn("%s is empty", fileName);
00612 else
00613     {
00614     if (startsWith("psLayout version", line))
00615         {
00616         wordCount = chopLine(line, words);
00617         if (wordCount < 3)
00618             errAbort("%s is not a psLayout file", fileName);
00619         version = words[2];
00620         if (sameString(version, "3"))
00621             {
00622             }
00623         else if (sameString(version, "4"))
00624             {
00625             qt = gfTypeFromName(words[3]);
00626             tt = gfTypeFromName(words[4]);
00627             }
00628         else
00629             {
00630             errAbort("%s is version %s of psLayout, this program can only handle through version 4",
00631                 fileName,  version);
00632             }
00633         for (i=0; i<4; ++i)
00634             {
00635             if (!lineFileNext(lf, &line, &lineSize))
00636                 errAbort("%s severely truncated", fileName);
00637             }
00638         }
00639     else
00640         {
00641         char *s = cloneString(line);
00642         boolean eof = FALSE;
00643         while ((line[0] == '#') && (!eof))
00644             {
00645             freeMem(s);
00646             if (!lineFileNext(lf, &line, &lineSize))
00647                 eof = TRUE;
00648             s = cloneString(line);
00649             }
00650         wordCount = chopLine(s, words);
00651         if ((wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-')) && (!eof))
00652             errAbort("%s is not a psLayout file", fileName);
00653         else
00654             lineFileReuse(lf); 
00655         freeMem(s);
00656         }
00657     }
00658 *retQueryType = qt;
00659 *retTargetType = tt;
00660 *retLf = lf;
00661 }

Here is the call graph for this function:

Here is the caller graph for this function:

void pslxFileOpenWithUniqueMeta ( char *  fileName,
enum gfType retQueryType,
enum gfType retTargetType,
struct lineFile **  retLf,
FILE *  f 
)

Definition at line 670 of file psl.c.

References pslxFileOpenWithMetaConfig(), and TRUE.

Referenced by pslFileOpenWithUniqueMeta().

00673 {
00674 pslxFileOpenWithMetaConfig(fileName, TRUE, retQueryType, retTargetType, retLf, f);
00675 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct psl* pslxLoad ( char **  row  )  [read]

Definition at line 56 of file psl.c.

References pslLoad(), psl::qSequence, sqlStringDynamicArray(), and psl::tSequence.

Referenced by pslNext().

00059 {
00060 struct psl *ret = pslLoad(row);
00061 int retSize;
00062 sqlStringDynamicArray(row[21],&ret->qSequence, &retSize);
00063 sqlStringDynamicArray(row[22],&ret->tSequence, &retSize);
00064 return ret;
00065 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct psl* pslxLoadLm ( char **  row,
struct lm lm 
) [read]

Definition at line 746 of file psl.c.

References psl::blockCount, lm, lmAlloc(), lmCloneString(), pslLoadLm(), psl::qSequence, sqlStringArray(), and psl::tSequence.

00748 {
00749 struct psl *ret = pslLoadLm(row, lm);
00750 ret->qSequence = lmAlloc(lm, sizeof(ret->qSequence[0]) * ret->blockCount);
00751 sqlStringArray(lmCloneString(lm,row[21]),ret->qSequence, ret->blockCount);
00752 ret->tSequence = lmAlloc(lm, sizeof(ret->tSequence[0]) * ret->blockCount);
00753 sqlStringArray(lmCloneString(lm,row[22]),ret->tSequence, ret->blockCount);
00754 return ret;
00755 }

Here is the call graph for this function:

void pslxWriteHead ( FILE *  f,
enum gfType  qType,
enum gfType  tType 
)

Definition at line 502 of file psl.c.

References gfTypeName(), and pslLabelColumns().

00504 {
00505 fprintf(f, "psLayout version 4 %s %s\n", gfTypeName(qType), gfTypeName(tType));
00506 pslLabelColumns(f);
00507 }

Here is the call graph for this function:

static void rcSeqs ( char **  seqs,
unsigned  blockCount,
unsigned *  blockSizes 
) [static]

Definition at line 1097 of file psl.c.

References freeMem(), needLargeMem(), and reverseComplement().

Referenced by pslRc(), and swapRCBlocks().

01100 {
01101 char *buf, *next;
01102 int i, memSz = 0;
01103 
01104 /* get a new memory block for strings */
01105 for (i = 0; i < blockCount; i++)
01106     memSz += blockSizes[i]+1;
01107 next = buf = needLargeMem(memSz);
01108 
01109 /* reverse compliment and copy to new memory block */
01110 for (i = blockCount-1; i >= 0; i--)
01111     {
01112     int len = strlen(seqs[i]);
01113     reverseComplement(seqs[i], len);
01114     memcpy(next, seqs[i], len+1);
01115     next += len+1;
01116     }
01117 
01118 /* swap memory and update pointers */
01119 freeMem(seqs[0]);
01120 seqs[0] = buf;
01121 next = buf;
01122 
01123 for (i = 0; i < blockCount; i++)
01124     {
01125     seqs[i] = next;
01126     next += blockSizes[i]+1;
01127     }
01128 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct hash* readPslToBinKeeper ( char *  sizeFileName,
char *  pslFileName 
) [read]

Definition at line 1663 of file psl.c.

References ArraySize, binKeeperAdd(), binKeeperNew(), hashAdd(), hashLookup(), hashMustFindVal(), lineFileClose(), lineFileNeedNum(), lineFileNextRow(), lineFileOpen(), lineFileRow, name, newHash(), pslLoad(), hash::size, psl::tEnd, psl::tName, TRUE, psl::tStart, and warn().

01665 {
01666 struct binKeeper *bk; 
01667 struct psl *psl;
01668 struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
01669 struct lineFile *pf = lineFileOpen(pslFileName , TRUE);
01670 struct hash *hash = newHash(0);
01671 char *chromRow[2];
01672 char *row[21] ;
01673 
01674 while (lineFileRow(sf, chromRow))
01675     {
01676     char *name = chromRow[0];
01677     int size = lineFileNeedNum(sf, chromRow, 1);
01678 
01679     if (hashLookup(hash, name) != NULL)
01680         warn("Duplicate %s, ignoring all but first\n", name);
01681     else
01682         {
01683         bk = binKeeperNew(0, size);
01684         assert(size > 1);
01685         hashAdd(hash, name, bk);
01686         }
01687     }
01688 while (lineFileNextRow(pf, row, ArraySize(row)))
01689     {
01690     psl = pslLoad(row);
01691     bk = hashMustFindVal(hash, psl->tName);
01692     binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
01693     }
01694 lineFileClose(&pf);
01695 lineFileClose(&sf);
01696 return hash;
01697 }

Here is the call graph for this function:

static void swapBlocks ( struct psl psl  )  [static]

Definition at line 1176 of file psl.c.

References psl::blockCount, psl::qSequence, psl::qStarts, swapVars, psl::tSequence, and psl::tStarts.

Referenced by pslSwap().

01178 {
01179 int i;
01180 unsigned utmp;
01181 char *stmp; 
01182 for (i = 0; i < psl->blockCount; i++)
01183     {
01184     swapVars(psl->qStarts[i], psl->tStarts[i], utmp);
01185     if (psl->qSequence != NULL)
01186         swapVars(psl->qSequence[i], psl->tSequence[i], stmp);
01187     }
01188 }

Here is the caller graph for this function:

static void swapRCBlocks ( struct psl psl  )  [static]

Definition at line 1190 of file psl.c.

References psl::blockCount, psl::blockSizes, psl::qSequence, psl::qSize, psl::qStarts, rcSeqs(), reverseUnsigned(), swapVars, psl::tSequence, psl::tSize, and psl::tStarts.

Referenced by pslSwap().

01193 {
01194 int i;
01195 unsigned *uatmp;
01196 char **satmp;
01197 reverseUnsigned(psl->tStarts, psl->blockCount);
01198 reverseUnsigned(psl->qStarts, psl->blockCount);
01199 reverseUnsigned(psl->blockSizes, psl->blockCount);
01200 swapVars(psl->tStarts, psl->qStarts, uatmp);
01201 
01202 /* qSize and tSize have already been swapped */
01203 for (i = 0; i < psl->blockCount; i++)
01204     {
01205     psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]);
01206     psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]);
01207     }
01208 if (psl->qSequence != NULL)
01209     {
01210     /* note: all block sequences are stored in one malloc block, which is
01211      * entry zero */
01212     rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes);
01213     rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes);
01214     swapVars(psl->qSequence, psl->tSequence, satmp);
01215     }
01216 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void trimAlignment ( struct psl psl,
char **  qStringPtr,
char **  tStringPtr,
int *  aliSizePtr 
) [static]

Definition at line 1741 of file psl.c.

References countInitialChars(), countNonInsert(), countTerminalChars(), max, psl::qEnd, psl::qStart, psl::tEnd, and psl::tStart.

Referenced by pslFromAlign().

01744 {
01745 char* qString = *qStringPtr;
01746 char* tString = *tStringPtr;
01747 int aliSize = *aliSizePtr;
01748 int qStartInsert = countInitialChars(qString, '-');
01749 int tStartInsert = countInitialChars(tString, '-');
01750 int qEndInsert = countTerminalChars(qString, '-');
01751 int tEndInsert = countTerminalChars(tString, '-');
01752 int startInsert = max(qStartInsert, tStartInsert);
01753 int endInsert = max(qEndInsert, tEndInsert);
01754 int qNonCount, tNonCount;
01755 
01756 if (startInsert > 0)
01757     {
01758     qNonCount = countNonInsert(qString, startInsert);
01759     tNonCount = countNonInsert(tString, startInsert);
01760     qString += startInsert;
01761     tString += startInsert;
01762     aliSize -= startInsert;
01763     psl->qStart += qNonCount;
01764     psl->tStart += tNonCount;
01765     }
01766 if (endInsert > 0)
01767     {
01768     aliSize -= endInsert;
01769     qNonCount = countNonInsert(qString+aliSize, endInsert);
01770     tNonCount = countNonInsert(tString+aliSize, endInsert);
01771     qString[aliSize] = 0;
01772     tString[aliSize] = 0;
01773     psl->qEnd -= qNonCount;
01774     psl->tEnd -= tNonCount;
01775     }
01776 *qStringPtr = qString;
01777 *tStringPtr = tString;
01778 *aliSizePtr = aliSize;
01779 /* recursive call to handle double gap */
01780 if (startInsert > 0 || endInsert > 0)
01781     trimAlignment(psl, qStringPtr, tStringPtr, aliSizePtr);
01782 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char* createString [static]

Initial value:

 
"CREATE TABLE %s (\n"
    "%s"                                
    "matches int unsigned not null,     # Number of bases that match that aren't repeats\n"
    "misMatches int unsigned not null,  # Number of bases that don't match\n"
    "repMatches int unsigned not null,  # Number of bases that match but are part of repeats\n"
    "nCount int unsigned not null,      # Number of 'N' bases\n"
    "qNumInsert int unsigned not null,  # Number of inserts in query\n"
    "qBaseInsert int unsigned not null, # Number of bases inserted in query\n"
    "tNumInsert int unsigned not null,  # Number of inserts in target\n"
    "tBaseInsert int unsigned not null, # Number of bases inserted in target\n"
    "strand char(2) not null,   # + or - for strand.  First character is query, second is target.\n"
    "qName varchar(255) not null,       # Query sequence name\n"
    "qSize int unsigned not null,       # Query sequence size\n"
    "qStart int unsigned not null,      # Alignment start position in query\n"
    "qEnd int unsigned not null,        # Alignment end position in query\n"
    "tName varchar(255) not null,       # Target sequence name\n"
    "tSize int unsigned not null,       # Target sequence size\n"
    "tStart int unsigned not null,      # Alignment start position in target\n"
    "tEnd int unsigned not null,        # Alignment end position in target\n"
    "blockCount int unsigned not null,  # Number of blocks in alignment\n"
    "blockSizes longblob not null,      # Size of each block\n"
    "qStarts longblob not null, # Start of each block in query.\n"
    "tStarts longblob not null, # Start of each block in target.\n"

Definition at line 24 of file psl.c.

Referenced by pslGetCreateSql().

char* indexString [static]

Initial value:

 
          "#Indices\n"
    "%s"                            
    "INDEX(qName(12))\n"
")\n"

Definition at line 49 of file psl.c.

Referenced by pslGetCreateSql().

char const rcsid[] = "$Id: psl.c,v 1.76 2007/03/04 20:24:19 kent Exp $" [static]

Definition at line 22 of file psl.c.


Generated on Tue Dec 25 20:12:56 2007 for blat by  doxygen 1.5.2