lib/chainToPsl.c

Go to the documentation of this file.
00001 /* chainToPsl - convert between chains and psl.  Both of these
00002  * are alignment formats that can handle gaps in both strands
00003  * and do not include the sequence itself. */
00004 
00005 #include "common.h"
00006 #include "linefile.h"
00007 #include "hash.h"
00008 #include "psl.h"
00009 #include "chain.h"
00010 
00011 static char const rcsid[] = "$Id: chainToPsl.c,v 1.5 2005/04/10 14:41:21 markd Exp $";
00012 
00013 
00014 struct psl *chainToPsl(struct chain *chain)
00015 /* chainToPsl - convert chain to psl.  This does not fill in
00016  * the match, repMatch, mismatch, and N fields since it needs
00017  * the sequence for that.  It does fill in the rest though. */
00018 {
00019 struct psl *psl;
00020 int blockCount, i;
00021 struct cBlock *b, *nextB;
00022 
00023 blockCount = slCount(chain->blockList);
00024 AllocVar(psl);
00025 AllocArray(psl->blockSizes, blockCount);
00026 AllocArray(psl->qStarts, blockCount);
00027 AllocArray(psl->tStarts, blockCount);
00028 psl->strand[0] = chain->qStrand;
00029 psl->qName = cloneString(chain->qName);
00030 psl->qSize = chain->qSize;
00031 if (chain->qStrand == '-')
00032     {
00033     psl->qStart = chain->qSize - chain->qEnd;
00034     psl->qEnd = chain->qSize - chain->qStart;
00035     }
00036 else
00037     {
00038     psl->qStart = chain->qStart;
00039     psl->qEnd = chain->qEnd;
00040     }
00041 psl->tName = cloneString(chain->tName);
00042 psl->tSize = chain->tSize;
00043 psl->tStart = chain->tStart;
00044 psl->tEnd = chain->tEnd;
00045 psl->blockCount = blockCount;
00046 for (i=0, b=chain->blockList; i < blockCount; ++i, b = nextB)
00047     {
00048     nextB = b->next;
00049     psl->tStarts[i] = b->tStart;
00050     psl->qStarts[i] = b->qStart;
00051     psl->blockSizes[i] = b->qEnd - b->qStart;
00052     if (nextB != NULL)
00053         {
00054         int qGap = nextB->qStart - b->qEnd;
00055         int tGap = nextB->tStart - b->tEnd;
00056         if (qGap != 0)
00057             {
00058             psl->qBaseInsert += qGap;
00059             psl->qNumInsert += 1;
00060             }
00061         if (tGap != 0)
00062             {
00063             psl->tBaseInsert += tGap;
00064             psl->tNumInsert += 1;
00065             }
00066         }
00067     }
00068 return psl;
00069 }
00070 
00071 static void fillInMatchEtc(struct chain *chain, 
00072         struct dnaSeq *query, struct dnaSeq *target, struct psl *psl)
00073 /* Fill in psl->match,mismatch,repMatch, and nCount fields.
00074  * Assumes that query and target are on correct strands already. */
00075 {
00076 struct cBlock *block;
00077 unsigned match = 0, misMatch=0, repMatch=0, nCount=0;
00078 for (block = chain->blockList; block != NULL; block = block->next)
00079     {
00080     DNA *qDna = query->dna + block->qStart;
00081     DNA *tDna = target->dna + block->tStart;
00082     int i, size = block->qEnd - block->qStart;
00083     for (i=0; i<size; ++i)
00084         {
00085         DNA q,t;
00086         int qv, tv;
00087         q = qDna[i];
00088         t = tDna[i];
00089         qv = ntVal[(int)q];
00090         tv = ntVal[(int)t];
00091         if (qv < 0 || tv < 0)
00092             ++nCount;
00093         else if (qv == tv)
00094             {
00095             if (isupper(q) && isupper(t))
00096                 ++match;
00097             else
00098                 ++repMatch;
00099             }
00100         else 
00101             ++misMatch;
00102         }
00103     }
00104 psl->match = match;
00105 psl->misMatch = misMatch;
00106 psl->repMatch = repMatch;
00107 psl->nCount = nCount;
00108 }
00109 
00110 struct psl *chainToFullPsl(struct chain *chain, 
00111         struct dnaSeq *query,   /* Forward query sequence. */
00112         struct dnaSeq *rQuery,  /* Reverse complemented query sequence. */
00113         struct dnaSeq *target)
00114 /* Convert chainList to pslList, filling in matches, N's etc. */
00115 {
00116 struct psl *psl = chainToPsl(chain);
00117 struct dnaSeq *qSeq = (chain->qStrand == '-' ? rQuery : query);
00118 fillInMatchEtc(chain, qSeq, target, psl);
00119 return psl;
00120 }
00121 
00122 
00123 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2