00001
00002
00003
00004
00005 #include "common.h"
00006 #include "linefile.h"
00007 #include "hash.h"
00008 #include "psl.h"
00009 #include "chain.h"
00010
00011 static char const rcsid[] = "$Id: chainToPsl.c,v 1.5 2005/04/10 14:41:21 markd Exp $";
00012
00013
00014 struct psl *chainToPsl(struct chain *chain)
00015
00016
00017
00018 {
00019 struct psl *psl;
00020 int blockCount, i;
00021 struct cBlock *b, *nextB;
00022
00023 blockCount = slCount(chain->blockList);
00024 AllocVar(psl);
00025 AllocArray(psl->blockSizes, blockCount);
00026 AllocArray(psl->qStarts, blockCount);
00027 AllocArray(psl->tStarts, blockCount);
00028 psl->strand[0] = chain->qStrand;
00029 psl->qName = cloneString(chain->qName);
00030 psl->qSize = chain->qSize;
00031 if (chain->qStrand == '-')
00032 {
00033 psl->qStart = chain->qSize - chain->qEnd;
00034 psl->qEnd = chain->qSize - chain->qStart;
00035 }
00036 else
00037 {
00038 psl->qStart = chain->qStart;
00039 psl->qEnd = chain->qEnd;
00040 }
00041 psl->tName = cloneString(chain->tName);
00042 psl->tSize = chain->tSize;
00043 psl->tStart = chain->tStart;
00044 psl->tEnd = chain->tEnd;
00045 psl->blockCount = blockCount;
00046 for (i=0, b=chain->blockList; i < blockCount; ++i, b = nextB)
00047 {
00048 nextB = b->next;
00049 psl->tStarts[i] = b->tStart;
00050 psl->qStarts[i] = b->qStart;
00051 psl->blockSizes[i] = b->qEnd - b->qStart;
00052 if (nextB != NULL)
00053 {
00054 int qGap = nextB->qStart - b->qEnd;
00055 int tGap = nextB->tStart - b->tEnd;
00056 if (qGap != 0)
00057 {
00058 psl->qBaseInsert += qGap;
00059 psl->qNumInsert += 1;
00060 }
00061 if (tGap != 0)
00062 {
00063 psl->tBaseInsert += tGap;
00064 psl->tNumInsert += 1;
00065 }
00066 }
00067 }
00068 return psl;
00069 }
00070
00071 static void fillInMatchEtc(struct chain *chain,
00072 struct dnaSeq *query, struct dnaSeq *target, struct psl *psl)
00073
00074
00075 {
00076 struct cBlock *block;
00077 unsigned match = 0, misMatch=0, repMatch=0, nCount=0;
00078 for (block = chain->blockList; block != NULL; block = block->next)
00079 {
00080 DNA *qDna = query->dna + block->qStart;
00081 DNA *tDna = target->dna + block->tStart;
00082 int i, size = block->qEnd - block->qStart;
00083 for (i=0; i<size; ++i)
00084 {
00085 DNA q,t;
00086 int qv, tv;
00087 q = qDna[i];
00088 t = tDna[i];
00089 qv = ntVal[(int)q];
00090 tv = ntVal[(int)t];
00091 if (qv < 0 || tv < 0)
00092 ++nCount;
00093 else if (qv == tv)
00094 {
00095 if (isupper(q) && isupper(t))
00096 ++match;
00097 else
00098 ++repMatch;
00099 }
00100 else
00101 ++misMatch;
00102 }
00103 }
00104 psl->match = match;
00105 psl->misMatch = misMatch;
00106 psl->repMatch = repMatch;
00107 psl->nCount = nCount;
00108 }
00109
00110 struct psl *chainToFullPsl(struct chain *chain,
00111 struct dnaSeq *query,
00112 struct dnaSeq *rQuery,
00113 struct dnaSeq *target)
00114
00115 {
00116 struct psl *psl = chainToPsl(chain);
00117 struct dnaSeq *qSeq = (chain->qStrand == '-' ? rQuery : query);
00118 fillInMatchEtc(chain, qSeq, target, psl);
00119 return psl;
00120 }
00121
00122
00123