jkOwnLib/xenbig.c File Reference

#include "common.h"
#include "dlist.h"
#include "hash.h"
#include "portable.h"
#include "dnautil.h"
#include "nt4.h"
#include "crudeali.h"
#include "wormdna.h"
#include "xenalign.h"

Include dependency graph for xenbig.c:

Go to the source code of this file.

Data Structures

struct  contig
struct  cluster

Defines

#define maxLineSize   50

Functions

static void freeContig (struct contig **pContig)
static void freeContigList (struct dlList **pContigList)
static void checkComplete (struct dlList *contigList)
static int cmpContigQueryStart (const void *va, const void *vb)
static void writeContig (struct contig *contig, FILE *out, boolean compactOutput, boolean newFormat)
static boolean contigsOverlap (struct contig *a, struct contig *b, int *retQueryOverlap, int *retTargetOverlap)
static int findSymIx (char *qSym, int qIx, int *retSymIx)
static boolean mergeIntoAdjacentClusters (struct dlList *clusterList, struct dlNode *contigNode)
static void maxAlignedOverlap (char *a1, char *a2, char *b1, char *b2, int size, int *retStart, int *retOverlapSize)
static void findMaxOverlap (char *a1, char *a2, int aSize, char *b1, char *b2, int bSize, int *retAstart, int *retBstart, int *retOverlapSize)
static struct contigforwardMergeTwo (struct contig *a, struct contig *b)
static void rcContigOffsets (struct contig *a, int revOff)
static void rcQueryOffsetsAroundSeg (struct contig *a, int segStart, int segEnd)
static struct contigmergeTwo (struct contig *a, struct contig *b)
static void mergeWithinCluster (struct cluster *cluster)
static boolean clusterEncompassedBefore (struct dlList *clusterList, struct cluster *cluster)
static void clusterRemoveEncompassed (struct dlList *clusterList)
static boolean contigEncompassedBefore (struct dlList *contigList, struct contig *contig)
static void contigRemoveEncompassed (struct dlList *contigList)
static int milliMatchScore (struct contig *contig)
static void mergeContigs (struct dlList *contigList, int stitchMinScore)
static void finishContigs (struct dlList **pContigList, FILE *out, int stitchMinScore, boolean compactOutput, boolean newFormat)
static int countNonGap (char *s, int size)
static void xStitch (char *inName, FILE *out, int stitchMinScore, boolean compactOutput, boolean newFormat)
void xenStitch (char *inName, FILE *out, int stitchMinScore, boolean compactOutput)
void xenStitcher (char *inName, FILE *out, int stitchMinScore, boolean compactOutput)
void xenAlignBig (DNA *query, int qSize, DNA *target, int tSize, FILE *f, boolean forHtml)
void xenAlignWorm (DNA *query, int qSize, FILE *f, boolean forHtml)

Variables

static char const rcsid [] = "$Id: xenbig.c,v 1.3 2006/03/15 18:36:16 angie Exp $"


Define Documentation

#define maxLineSize   50

Referenced by writeContig().


Function Documentation

static void checkComplete ( struct dlList contigList  )  [static]

Definition at line 68 of file xenbig.c.

References errAbort(), dlList::head, contig::isComplete, dlNode::next, contig::qEnd, contig::qStart, contig::qStrand, contig::query, contig::target, contig::tEnd, contig::tStart, contig::tStrand, and dlNode::val.

Referenced by finishContigs().

00070 {
00071 struct dlNode *node;
00072 struct contig *contig;
00073 
00074 for (node = contigList->head; node->next != NULL; node = node->next)
00075     {
00076     contig = node->val;
00077     if (!contig->isComplete)
00078         {
00079         errAbort("Contig %s:%d-%d %c %s:%d-%d %c isn't complete",
00080             contig->query, contig->qStart, contig->qEnd, contig->qStrand,
00081             contig->target, contig->tStart, contig->tEnd, contig->tStrand);
00082         }
00083     }
00084 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean clusterEncompassedBefore ( struct dlList clusterList,
struct cluster cluster 
) [static]

Definition at line 602 of file xenbig.c.

References FALSE, dlList::head, dlNode::next, cluster::node, cluster::qEnd, cluster::qStart, cluster::qStrand, cluster::query, sameString, cluster::target, cluster::tEnd, TRUE, cluster::tStart, cluster::tStrand, and dlNode::val.

Referenced by clusterRemoveEncompassed().

00605 {
00606 struct dlNode *node;
00607 struct cluster *c;
00608 
00609 for (node = clusterList->head; node->next != NULL; node = node->next)
00610     {
00611     c = node->val;
00612     if (c == cluster)
00613         return FALSE;
00614     if (c->qStart <= cluster->qStart && c->qEnd >= cluster->qEnd &&
00615         c->tStart <= cluster->tStart && c->tEnd >= cluster->tEnd &&
00616         c->qStrand == cluster->qStrand &&
00617         c->tStrand == cluster->tStrand &&
00618         sameString(c->query, cluster->query) &&
00619         sameString(c->target, cluster->target) )
00620         {
00621         return TRUE;
00622         }            
00623     }
00624 assert(FALSE);
00625 return FALSE;
00626 }

Here is the caller graph for this function:

static void clusterRemoveEncompassed ( struct dlList clusterList  )  [static]

Definition at line 628 of file xenbig.c.

References clusterEncompassedBefore(), cluster::contigs, dlRemove(), freeContigList(), freeMem(), dlList::head, dlNode::next, cluster::node, and dlNode::val.

Referenced by mergeContigs().

00631 {
00632 struct dlNode *node, *next;
00633 struct cluster *cluster;
00634 
00635 for (node = clusterList->head; node->next != NULL; node = next)
00636     {
00637     next = node->next;
00638     cluster = node->val;
00639     if (clusterEncompassedBefore(clusterList, cluster))
00640         {
00641         dlRemove(node);
00642         freeMem(node);
00643         freeContigList(&cluster->contigs);
00644         freeMem(cluster);
00645         }
00646     }
00647 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int cmpContigQueryStart ( const void *  va,
const void *  vb 
) [static]

Definition at line 86 of file xenbig.c.

References contig::qEnd, and contig::qStart.

Referenced by mergeContigs().

00090 {
00091 struct contig **pA = (struct contig **)va;
00092 struct contig **pB = (struct contig **)vb;
00093 struct contig *a = *pA, *b = *pB;
00094 int dif;
00095 
00096 dif = a->qStart - b->qStart;
00097 if (dif == 0)
00098    //dif = a->tStart - b->tStart;
00099    dif = b->qEnd - a->qEnd;
00100 return dif;
00101 }

Here is the caller graph for this function:

static boolean contigEncompassedBefore ( struct dlList contigList,
struct contig contig 
) [static]

Definition at line 649 of file xenbig.c.

References FALSE, dlList::head, dlNode::next, contig::qEnd, contig::qStart, contig::qStrand, contig::query, sameString, contig::target, contig::tEnd, TRUE, contig::tStart, contig::tStrand, and dlNode::val.

Referenced by contigRemoveEncompassed().

00652 {
00653 struct dlNode *node;
00654 struct contig *c;
00655 
00656 for (node = contigList->head; node->next != NULL; node = node->next)
00657     {
00658     c = node->val;
00659     if (c == contig)
00660         return FALSE;
00661     if (c->qStart <= contig->qStart && c->qEnd >= contig->qEnd &&
00662         c->tStart <= contig->tStart && c->tEnd >= contig->tEnd &&
00663         c->qStrand == contig->qStrand &&
00664         c->tStrand == contig->tStrand &&
00665         sameString(c->query, contig->query) &&
00666         sameString(c->target, contig->target) )
00667         {
00668         return TRUE;
00669         }            
00670     }
00671 assert(FALSE);
00672 return FALSE;
00673 }

Here is the caller graph for this function:

static void contigRemoveEncompassed ( struct dlList contigList  )  [static]

Definition at line 675 of file xenbig.c.

References contigEncompassedBefore(), dlRemove(), freeContig(), freeMem(), dlList::head, dlNode::next, and dlNode::val.

Referenced by mergeContigs().

00678 {
00679 struct dlNode *node, *next;
00680 struct contig *contig;
00681 
00682 for (node = contigList->head; node->next != NULL; node = next)
00683     {
00684     next = node->next;
00685     contig = node->val;
00686     if (contigEncompassedBefore(contigList, contig))
00687         {
00688         dlRemove(node);
00689         freeMem(node);
00690         freeContig(&contig);
00691         }
00692     }
00693 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean contigsOverlap ( struct contig a,
struct contig b,
int *  retQueryOverlap,
int *  retTargetOverlap 
) [static]

Definition at line 219 of file xenbig.c.

References FALSE, max, min, contig::qEnd, contig::qStart, contig::tEnd, TRUE, and contig::tStart.

Referenced by mergeWithinCluster().

00222 {
00223 int overlap;
00224 int start, end;
00225 
00226 /* Check overlap in query. */
00227 start = max(a->qStart, b->qStart);
00228 end = min(a->qEnd, b->qEnd);
00229 overlap = end - start;
00230 if (overlap > 0)
00231     *retQueryOverlap = overlap;
00232 else
00233     return FALSE;
00234 
00235 /* Check overlap in target. */
00236 start = max(a->tStart, b->tStart);
00237 end = min(a->tEnd, b->tEnd);
00238 overlap = end - start;
00239 if (overlap > 0)
00240     *retTargetOverlap = overlap;
00241 else
00242     return FALSE;
00243 return TRUE;
00244 }

Here is the caller graph for this function:

static int countNonGap ( char *  s,
int  size 
) [static]

Definition at line 836 of file xenbig.c.

Referenced by xStitch().

00838 {
00839 int count = 0;
00840 while (--size >= 0)
00841     if (*s++ != '-') ++count;
00842 return count;
00843 }

Here is the caller graph for this function:

static void findMaxOverlap ( char *  a1,
char *  a2,
int  aSize,
char *  b1,
char *  b2,
int  bSize,
int *  retAstart,
int *  retBstart,
int *  retOverlapSize 
) [static]

Definition at line 350 of file xenbig.c.

References maxAlignedOverlap(), and min.

Referenced by forwardMergeTwo().

00354 {
00355 int i;
00356 int maxOverlap = -1;
00357 int maxAstart = 0;
00358 int maxBstart = 0;
00359 int overlap;
00360 int minSize = min(aSize, bSize);
00361 int goodEnough = minSize-1;
00362 int start;
00363 
00364 for (i=0; i<bSize; ++i)
00365     {
00366     int size = bSize-i;
00367     if (size > aSize)
00368         size = aSize;
00369     maxAlignedOverlap(a1, a2, b1+i, b2+i, size, &start, &overlap);
00370     if (overlap > maxOverlap)
00371         {
00372         maxOverlap = overlap;
00373         maxAstart = start;
00374         maxBstart = i+start;
00375         if (maxOverlap >= goodEnough)
00376             goto RETURN_VALS;
00377         }
00378     }
00379 for (i=1; i<aSize; ++i)
00380     {
00381     int size = aSize-i;
00382     if (size > bSize)
00383         size = bSize;
00384     maxAlignedOverlap(a1+i, a2+i, b1, b2, size, &start, &overlap);
00385     if (overlap > maxOverlap)
00386         {
00387         maxOverlap = overlap;
00388         maxAstart = i+start;
00389         maxBstart = start;
00390         if (maxOverlap >= goodEnough)
00391             goto RETURN_VALS;
00392         }
00393     }
00394 RETURN_VALS:
00395     {
00396     *retAstart = maxAstart;
00397     *retBstart = maxBstart;
00398     *retOverlapSize = maxOverlap;
00399     }
00400 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int findSymIx ( char *  qSym,
int  qIx,
int *  retSymIx 
) [static]

Definition at line 246 of file xenbig.c.

References FALSE, TRUE, and warn().

Referenced by forwardMergeTwo().

00248 {
00249 char q;
00250 int symIx;
00251 int qCount = 0;
00252 
00253 for (symIx = 0; ; ++symIx)
00254     {
00255     if (qCount == qIx)
00256         {
00257         *retSymIx = symIx;
00258         return TRUE;
00259         }
00260     if ((q = *qSym++) == 0)
00261         {
00262         warn("findSymIx gone past end");
00263         return FALSE;
00264         }
00265     if (q != '-')
00266         ++qCount;
00267     }
00268 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void finishContigs ( struct dlList **  pContigList,
FILE *  out,
int  stitchMinScore,
boolean  compactOutput,
boolean  newFormat 
) [static]

Definition at line 817 of file xenbig.c.

References checkComplete(), freeContigList(), mergeContigs(), newDlList(), dlNode::next, dlNode::val, and writeContig().

Referenced by xStitch().

00821 {
00822 struct contig *contig;
00823 struct dlNode *node;
00824 
00825 checkComplete(*pContigList);
00826 mergeContigs(*pContigList, stitchMinScore);
00827 for (node = (*pContigList)->head; node->next != NULL; node = node->next)
00828     {
00829     contig = node->val;
00830     writeContig(contig, out, compactOutput, newFormat);
00831     }
00832 freeContigList(pContigList);
00833 *pContigList = newDlList();
00834 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct contig* forwardMergeTwo ( struct contig a,
struct contig b 
) [static, read]

Definition at line 402 of file xenbig.c.

References AllocVar, cloneString(), findMaxOverlap(), findSymIx(), contig::hSym, max, min, needMem(), contig::qEnd, contig::qStart, contig::qStrand, contig::qSym, contig::query, contig::queryFile, contig::symCount, contig::target, contig::tEnd, contig::tStart, contig::tStrand, and contig::tSym.

Referenced by mergeTwo().

00405 {
00406 int aSymOverlapStart, aSymOverlapEnd, aSymOverlapSize;
00407 int bSymOverlapStart, bSymOverlapEnd, bSymOverlapSize;
00408 int aqoStart, bqoStart;
00409 int overlapSize, halfOverlapSize;
00410 int aCopyStart, aCopyEnd, bCopyStart, bCopyEnd;
00411 int newSymCount;
00412 struct contig *c;
00413 int aCopySize, bCopySize;
00414 int qOverlap;
00415 
00416 /* Find overlapping area in symbol coordinates. */
00417 qOverlap = b->qStart - a->qStart;
00418 if (qOverlap < 0 || qOverlap > a->qEnd - a->qStart)
00419     return NULL;
00420 if (!findSymIx(a->qSym, b->qStart - a->qStart, &aSymOverlapStart))
00421     return NULL;
00422 aSymOverlapEnd = a->symCount;
00423 aSymOverlapSize = aSymOverlapEnd - aSymOverlapStart;
00424 bSymOverlapStart = 0;
00425 if (!findSymIx(b->qSym, a->qEnd - b->qStart, &bSymOverlapEnd))
00426     return NULL;
00427 bSymOverlapSize = bSymOverlapEnd - bSymOverlapStart;
00428 
00429 /* Find subset of overlapping area that is identical
00430  * in source and query symbols. */
00431 findMaxOverlap(a->qSym + aSymOverlapStart, 
00432     a->tSym + aSymOverlapStart, aSymOverlapSize, 
00433     b->qSym + bSymOverlapStart,
00434     b->tSym + bSymOverlapStart, bSymOverlapSize, 
00435     &aqoStart, &bqoStart, &overlapSize);
00436 if (overlapSize < 15)
00437     return NULL;
00438 aSymOverlapStart += aqoStart;
00439 bSymOverlapStart += bqoStart;
00440 halfOverlapSize = overlapSize/2;
00441 
00442 /* Figure out what areas will go into new contig, and how big
00443  * it will be. */
00444 aCopyStart = 0;
00445 aCopyEnd = aSymOverlapStart + halfOverlapSize;
00446 bCopyStart = bSymOverlapStart + halfOverlapSize;
00447 bCopyEnd = b->symCount;
00448 newSymCount = aCopyEnd - aCopyStart + bCopyEnd - bCopyStart;
00449 
00450 /* Allocate new contig with set of symbols big enough to hold
00451  * merged contig. */
00452 AllocVar(c);
00453 c->query = a->query;
00454 c->queryFile = a->queryFile;
00455 c->qStart = min(a->qStart, b->qStart);
00456 c->qEnd = max(a->qEnd, b->qEnd);
00457 c->qStrand = a->qStrand;
00458 c->target = cloneString(a->target);
00459 c->tStart = a->tStart;
00460 c->tEnd = b->tEnd;
00461 c->tStrand = a->tStrand;
00462 c->qSym = needMem(newSymCount);
00463 c->tSym = needMem(newSymCount);
00464 c->hSym = needMem(newSymCount);
00465 c->symCount = newSymCount;
00466 
00467 /* Copy symbols from contig a up to half way through
00468  * overlapping area */
00469 aCopySize = aCopyEnd - aCopyStart;
00470 bCopySize = bCopyEnd - bCopyStart;
00471 memcpy(c->qSym, a->qSym+aCopyStart, aCopySize);
00472 memcpy(c->tSym, a->tSym+aCopyStart, aCopySize);
00473 memcpy(c->hSym, a->hSym+aCopyStart, aCopySize);
00474 
00475 /* Append symbols from contig b from half way through
00476  * overlapping area to end. */
00477 memcpy(c->qSym+aCopySize, b->qSym+bCopyStart, bCopySize);
00478 memcpy(c->tSym+aCopySize, b->tSym+bCopyStart, bCopySize);
00479 memcpy(c->hSym+aCopySize, b->hSym+bCopyStart, bCopySize);
00480 
00481 return c;
00482 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void freeContig ( struct contig **  pContig  )  [static]

Definition at line 39 of file xenbig.c.

References freeMem(), freez(), contig::hSym, contig::name, contig::qSym, contig::target, and contig::tSym.

Referenced by contigRemoveEncompassed(), freeContigList(), and mergeWithinCluster().

00041 {
00042 struct contig *contig = *pContig;
00043 if (contig != NULL)
00044     {
00045     freeMem(contig->name);
00046     freeMem(contig->target);
00047     freeMem(contig->qSym);
00048     freeMem(contig->tSym);
00049     freeMem(contig->hSym);
00050     freez(pContig);
00051     }
00052 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void freeContigList ( struct dlList **  pContigList  )  [static]

Definition at line 54 of file xenbig.c.

References freeContig(), freeDlList(), dlNode::next, and dlNode::val.

Referenced by clusterRemoveEncompassed(), finishContigs(), and mergeContigs().

00056 {
00057 struct dlNode *node;
00058 struct contig *contig;
00059 
00060 for (node = (*pContigList)->head; node->next != NULL; node = node->next)
00061     {
00062     contig = node->val;
00063     freeContig(&contig);
00064     }
00065 freeDlList(pContigList);
00066 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void maxAlignedOverlap ( char *  a1,
char *  a2,
char *  b1,
char *  b2,
int  size,
int *  retStart,
int *  retOverlapSize 
) [static]

Definition at line 318 of file xenbig.c.

Referenced by findMaxOverlap().

00322 {
00323 int maxStart = -1;
00324 int maxLen = -1;
00325 int curStart = 0;
00326 int curLen = 0;
00327 int i;
00328 
00329 for (i=0; i<size; ++i)
00330     {
00331     if (a1[i] == b1[i] && a2[i] == b2[i])
00332         {
00333         curLen += 1;
00334         if (curLen > maxLen)
00335             {
00336             maxLen = curLen;
00337             maxStart = curStart;
00338             }
00339         }
00340     else
00341         {
00342         curLen = 0;
00343         curStart = i+1;
00344         }
00345     }
00346 *retStart = maxStart;
00347 *retOverlapSize = maxLen;
00348 }

Here is the caller graph for this function:

static void mergeContigs ( struct dlList contigList,
int  stitchMinScore 
) [static]

Definition at line 712 of file xenbig.c.

References AllocVar, cloneString(), clusterRemoveEncompassed(), cmpContigQueryStart(), contigRemoveEncompassed(), cluster::contigs, dlAddTail(), dlAddValTail(), dlCount(), dlPopHead(), dlRemove(), dlSort(), freeContigList(), freeDlList(), freeDlListAndVals(), freeMem(), dlList::head, mergeIntoAdjacentClusters(), mergeWithinCluster(), milliMatchScore(), contig::name, newDlList(), dlNode::next, contig::qEnd, contig::qEndOffset, contig::qOffset, contig::qStart, contig::qStrand, contig::query, cluster::score, contig::score, contig::target, contig::tEnd, contig::tEndOffset, contig::tOffset, contig::tStart, contig::tStrand, and dlNode::val.

Referenced by finishContigs().

00715 {
00716 struct dlList *clusterList;
00717 struct dlNode *contigNode, *clusterNode;
00718 struct dlNode *nextClusterNode;
00719 struct cluster *cluster;
00720 struct contig *contig;
00721 int contigCount;
00722 int startCount = 0;
00723 
00724 /* Make sure there's really something to do, so we don't have
00725  * to NULL check so much in the rest of this routine. */
00726 contigCount = dlCount(contigList);
00727 if (contigCount < 1)
00728     return;
00729 
00730 clusterList = newDlList();
00731 
00732 /* Group together contigs that overlap in both query and target
00733    into clusters. */
00734 while ((contigNode = dlPopHead(contigList)) != NULL)
00735     {
00736     if (!mergeIntoAdjacentClusters(clusterList, contigNode))
00737         {
00738         AllocVar(cluster);
00739         contig = contigNode->val;
00740         cluster->contigs = newDlList();
00741         dlAddTail(cluster->contigs, contigNode);
00742         cluster->query = contig->query;
00743         cluster->qStart = contig->qStart;
00744         cluster->qEnd = contig->qEnd;
00745         cluster->qOffset = contig->qOffset;
00746         cluster->qEndOffset = contig->qEndOffset;
00747         cluster->qStrand = contig->qStrand;
00748         cluster->target = contig->target;
00749         cluster->tStart = contig->tStart;
00750         cluster->tEnd = contig->tEnd;
00751         cluster->tOffset = contig->tOffset;
00752         cluster->tEndOffset = contig->tEndOffset;
00753         cluster->tStrand = contig->tStrand;
00754         cluster->score = contig->score;
00755         dlAddValTail(clusterList, cluster);
00756         }
00757     }
00758 
00759 /* Weed out clusters that are too weak. */
00760 for (clusterNode = clusterList->head; clusterNode->next != NULL; 
00761     clusterNode = nextClusterNode)
00762     {
00763     nextClusterNode = clusterNode->next;
00764     cluster = clusterNode->val;
00765     if (cluster->score < stitchMinScore)
00766         {
00767         dlRemove(clusterNode);
00768         freeContigList(&cluster->contigs);
00769         freeMem(cluster);
00770         freeMem(clusterNode);
00771         }
00772     }
00773 
00774 clusterRemoveEncompassed(clusterList);
00775 
00776 /* Merge contigs within cluster into a single contig, and move that contig
00777  * back onto cluster list. */
00778 for (clusterNode = clusterList->head; clusterNode->next != NULL; clusterNode = clusterNode->next)
00779     {
00780     cluster = clusterNode->val;
00781     mergeWithinCluster(cluster);
00782     while ((contigNode = dlPopHead(cluster->contigs)) != NULL)
00783         dlAddTail(contigList, contigNode);
00784     freeDlList(&cluster->contigs);
00785     }
00786 
00787 dlSort(contigList, cmpContigQueryStart);
00788 contigRemoveEncompassed(contigList);
00789 
00790 for (contigNode = contigList->head; contigNode->next != NULL; contigNode = contigNode->next)
00791     {
00792     char nameBuf[128];
00793     char *noDir;
00794     char *noDot;
00795     contig = contigNode->val;
00796     /* Get rid of directory and suffix parts of file name to use as
00797      * base for contig name. */
00798     noDir = strrchr(contig->query, '\\');
00799     if (noDir == NULL)
00800         noDir = strrchr(contig->query, '/');
00801     if (noDir == NULL)
00802         noDir = contig->query;
00803     else
00804         noDir += 1;
00805     noDot = strrchr(noDir, '.');
00806     if (noDot != NULL)
00807         *noDot = 0;
00808     sprintf(nameBuf, "%s.c%d", noDir, ++startCount);    
00809     contig->name = cloneString(nameBuf);
00810     contig->score = milliMatchScore(contig);
00811     }
00812 
00813 /* Free up now gutted cluster list. */
00814 freeDlListAndVals(&clusterList);
00815 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean mergeIntoAdjacentClusters ( struct dlList clusterList,
struct dlNode contigNode 
) [static]

Definition at line 270 of file xenbig.c.

References cluster::contigs, dlAddTail(), FALSE, dlList::head, dlNode::next, contig::qEndOffset, cluster::qEndOffset, contig::qOffset, cluster::qStrand, contig::qStrand, cluster::query, contig::query, sameString, contig::score, cluster::score, cluster::target, contig::target, cluster::tEndOffset, contig::tEndOffset, cluster::tOffset, contig::tOffset, TRUE, cluster::tStrand, contig::tStrand, and dlNode::val.

Referenced by mergeContigs().

00273 {
00274 struct contig *contig = contigNode->val;  /* The contig we're trying to fold in. */
00275 struct dlNode *clusterNode;               /* Node of a single cluster. */
00276 struct cluster *cluster;                  /* A cluster. */
00277 
00278 for (clusterNode = clusterList->head; clusterNode->next != NULL; clusterNode = clusterNode->next)
00279     {
00280     cluster = clusterNode->val;
00281     if ((contig->qOffset == cluster->qEndOffset - 1000 ||
00282         (contig->qEndOffset == cluster->qEndOffset &&
00283          contig->tEndOffset == cluster->tEndOffset)) &&
00284         contig->qStrand == cluster->qStrand &&
00285         contig->tStrand == cluster->tStrand &&
00286         sameString(contig->target, cluster->target) &&
00287         sameString(contig->query, cluster->query))
00288         {
00289         if (contig->qStrand == contig->tStrand)
00290             {
00291             if (contig->tOffset < cluster->tEndOffset + 50000
00292                 && contig->tOffset >= cluster->tOffset-300)
00293                 {
00294                 cluster->qEndOffset = contig->qEndOffset;
00295                 cluster->tEndOffset = contig->tEndOffset;
00296                 cluster->score += contig->score;
00297                 dlAddTail(cluster->contigs, contigNode);
00298                 return TRUE;
00299                 }
00300             }
00301         else
00302             {
00303             if ( contig->tEndOffset > cluster->tOffset - 50000
00304                 && contig->tEndOffset <= cluster->tEndOffset+300)
00305                 {
00306                 cluster->qEndOffset = contig->qEndOffset;
00307                 cluster->tOffset = contig->tOffset;
00308                 cluster->score += contig->score;
00309                 dlAddTail(cluster->contigs, contigNode);
00310                 return TRUE;
00311                 }
00312             }
00313         }
00314     }
00315 return FALSE;
00316 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct contig* mergeTwo ( struct contig a,
struct contig b 
) [static, read]

Definition at line 507 of file xenbig.c.

References forwardMergeTwo(), contig::qEnd, contig::qStart, contig::qStrand, rcContigOffsets(), contig::tEnd, contig::tStart, and contig::tStrand.

Referenced by mergeWithinCluster().

00510 {
00511 int aStart, bEnd, revOff;
00512 struct contig *c;
00513 int tStart, tEnd;
00514 int qStart, qEnd;
00515 
00516 /* Take care of easy forward case first. */
00517 if (a->qStrand == a->tStrand)
00518     return forwardMergeTwo(a,b);
00519 
00520 /* Figure out a good position to do reverse complementing of
00521  * offsets about.  */
00522 aStart = a->qStart;
00523 bEnd = b->qEnd;
00524 revOff = aStart + bEnd;
00525 
00526 /* Save target beginning and end. */
00527 tStart = b->tStart;
00528 tEnd = a->tEnd;
00529 
00530 /* Save target begin and end. */
00531 qStart = a->qStart;
00532 qEnd = b->qEnd;
00533 
00534 /* Reverse complement offsets, do merge, and reverse complement
00535  * offsets back. */
00536 rcContigOffsets(a, revOff);
00537 rcContigOffsets(b, revOff);
00538 c = forwardMergeTwo(b,a);
00539 rcContigOffsets(a, revOff);
00540 rcContigOffsets(b, revOff);
00541 if (c != NULL)
00542     {
00543     rcContigOffsets(c, revOff);
00544     c->tStart = tStart;
00545     c->tEnd = tEnd;
00546     assert(c->qStart == qStart && c->qEnd == qEnd);
00547     }
00548 return c;
00549 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void mergeWithinCluster ( struct cluster cluster  )  [static]

Definition at line 552 of file xenbig.c.

References cluster::contigs, contigsOverlap(), dlCount(), dlRemove(), freeContig(), freeMem(), dlList::head, mergeTwo(), dlNode::next, dlNode::val, and warn().

Referenced by mergeContigs().

00554 {
00555 int clusterSize = dlCount(cluster->contigs);
00556 struct dlNode *prevNode, *curNode, *nextNode;
00557 struct contig *prev, *cur, *merged;
00558 int tOverlap, qOverlap;
00559 
00560 /* uglyf("M %d %s:%d-%d %c (%d-%d) %s:%d-%d %c (%d-%d)\n",
00561     clusterSize, 
00562     cluster->query, cluster->qStart, cluster->qEnd, cluster->qStrand,
00563     cluster->qOffset, cluster->qEndOffset,
00564     cluster->target, cluster->tStart, cluster->tEnd, cluster->tStrand,
00565     cluster->tOffset, cluster->tEndOffset);
00566  */
00567 
00568 if (clusterSize < 2)
00569     return;     /* Wow was that easy! */
00570 curNode = cluster->contigs->head;
00571 cur = curNode->val;
00572 nextNode = curNode->next;
00573 
00574 for (;;)
00575     {
00576     /* Start of loop logic - store current position as previous, and
00577      * move to next, stop if at end of list. */
00578     prevNode = curNode;
00579     curNode = nextNode;
00580     if ((nextNode = curNode->next) == NULL)
00581         break;
00582     prev = cur;
00583     cur = curNode->val;    
00584 
00585     /* Figure out if current node overlaps with previous. */
00586     if (contigsOverlap(prev, cur, &qOverlap, &tOverlap))
00587         {
00588         if ((merged = mergeTwo(prev, cur)) == NULL)
00589             warn("Couldn't merge two");
00590         else
00591             {
00592             dlRemove(prevNode);
00593             freeMem(prevNode);
00594             freeContig(&cur);
00595             freeContig(&prev);
00596             curNode->val = cur = merged;
00597             }
00598         }
00599     }
00600 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int milliMatchScore ( struct contig contig  )  [static]

Definition at line 695 of file xenbig.c.

References contig::qSym, round, contig::symCount, and contig::tSym.

Referenced by mergeContigs().

00697 {
00698 int symCount = contig->symCount;
00699 char *qSym = contig->qSym;
00700 char *tSym = contig->tSym;
00701 int matchCount = 0;
00702 int i;
00703 
00704 for (i=0; i<symCount; ++i)
00705     {
00706     if (qSym[i] == tSym[i])
00707         ++matchCount;
00708     }
00709 return round( (1000.0 * matchCount) /  symCount);
00710 } 

Here is the caller graph for this function:

static void rcContigOffsets ( struct contig a,
int  revOff 
) [static]

Definition at line 484 of file xenbig.c.

References contig::qEnd, contig::qStart, contig::tEnd, and contig::tStart.

Referenced by mergeTwo().

00486 {
00487 int temp;
00488 
00489 temp = a->qStart;
00490 a->qStart = revOff - a->qEnd;
00491 a->qEnd = revOff - temp;
00492 temp = a->tStart;
00493 a->tStart = revOff - a->tEnd;
00494 a->tEnd = revOff - temp;
00495 }

Here is the caller graph for this function:

static void rcQueryOffsetsAroundSeg ( struct contig a,
int  segStart,
int  segEnd 
) [static]

Definition at line 497 of file xenbig.c.

References contig::qEnd, contig::qStart, and reverseOffset().

Referenced by xStitch().

00499 {
00500 int segSize = segEnd - segStart;
00501 int qEnd = reverseOffset(a->qStart - segStart, segSize) + segStart + 1;
00502 int qStart = reverseOffset(a->qEnd - segStart, segSize) + segStart + 1;
00503 a->qStart = qStart;
00504 a->qEnd = qEnd;
00505 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void writeContig ( struct contig contig,
FILE *  out,
boolean  compactOutput,
boolean  newFormat 
) [static]

Definition at line 103 of file xenbig.c.

References contig::hSym, maxLineSize, mustWrite(), contig::name, contig::qEnd, contig::qStart, contig::qStrand, contig::qSym, contig::query, contig::queryFile, contig::score, contig::symCount, contig::target, contig::tEnd, contig::tStart, contig::tStrand, and contig::tSym.

Referenced by finishContigs().

00106 {
00107 int i;
00108 int size = contig->symCount;
00109 #define maxLineSize 50
00110 char buf[maxLineSize+1];
00111 int lineSize;
00112 int j;
00113 int qIx = 0, tIx = 0;
00114 char *qSyms = contig->qSym;
00115 char *tSyms = contig->tSym;
00116 
00117 if (newFormat)
00118     {
00119     fprintf(out, "%s align %d.%d%% of %d %s %s:%d-%d %c %s:%d-%d %c\n",
00120         contig->name, contig->score/10, contig->score%10, size, 
00121         contig->queryFile, contig->query, contig->qStart, contig->qEnd, contig->qStrand,
00122         contig->target, contig->tStart, contig->tEnd, contig->tStrand);
00123     }
00124 else
00125     {
00126     fprintf(out, "%s align %d.%d%% of %d %s:%d-%d %c %s:%d-%d %c\n",
00127         contig->name, contig->score/10, contig->score%10, size, 
00128         contig->query, contig->qStart, contig->qEnd, contig->qStrand,
00129         contig->target, contig->tStart, contig->tEnd, contig->tStrand);
00130     }
00131 if (compactOutput)
00132     {
00133     mustWrite(out, contig->qSym, size);
00134     fputc('\n', out);
00135     mustWrite(out, contig->tSym, size);
00136     fputc('\n', out);
00137     mustWrite(out, contig->hSym, size);
00138     fputc('\n', out);
00139     }
00140 else
00141     {
00142     for (i=0; i<size; i += lineSize)
00143         {
00144         /* Figure out how long this line is going to be. */
00145         lineSize = size - i;
00146         if (lineSize > maxLineSize)
00147             lineSize = maxLineSize;
00148         buf[lineSize] = 0;
00149     
00150         /* Print query symbols */
00151         memcpy(buf, contig->qSym + i, lineSize);
00152         fprintf(out, "%5d %s\n", qIx, buf);
00153 
00154         /* Print matching symbols */
00155         fprintf(out, "%5s ", "");
00156         for (j=0; j<lineSize; ++j)
00157             {
00158             int off = j+i;
00159             char q = qSyms[off];
00160             char t = tSyms[off];
00161             char c = (q == t ? '|' : ' ');
00162             fputc(c, out);
00163             }
00164         fputc('\n', out);
00165 
00166         /* Print target symbols */
00167         memcpy(buf, contig->tSym + i, lineSize);
00168         fprintf(out, "%5d %s\n", tIx, buf);
00169 
00170         /* Print hidden symbols */
00171         memcpy(buf, contig->hSym + i, lineSize);
00172         fprintf(out, "%5s %s\n", "", buf);
00173 
00174         /* Figure out how far we have advanced */
00175         for (j=0; j<lineSize; ++j)
00176             {
00177             int off = j+i;
00178             char q = qSyms[off];
00179             char t = tSyms[off];
00180             if (q != '-')
00181                 ++qIx;
00182             if (t != '-')
00183                 ++tIx;
00184             }
00185         /* Print blank line */
00186         fputc('\n', out);
00187         }
00188     }
00189 #undef lineSize
00190 }

Here is the call graph for this function:

Here is the caller graph for this function:

void xenAlignBig ( DNA query,
int  qSize,
DNA target,
int  tSize,
FILE *  f,
boolean  forHtml 
)

Definition at line 1055 of file xenbig.c.

References crudeAliFind(), crudeAli::end, FALSE, tempName::forCgi, freeNt4(), makeTempName(), mustOpen(), newNt4(), crudeAli::next, reverseComplement(), slFreeList(), crudeAli::start, crudeAli::strand, xenAlignSmall(), and xenStitcher().

01057 {
01058 struct crudeAli *caList, *ca;
01059 struct nt4Seq *nt4 = newNt4(target, tSize, "target");
01060 struct tempName dynTn;
01061 int i;
01062 int lqSize;
01063 int qMaxSize = 2000;
01064 int tMaxSize = 2*qMaxSize;
01065 int lastQsection = qSize - qMaxSize/2;
01066 FILE *dynFile;
01067 int tMin, tMax, tMid;
01068 int score;
01069 int totalAli = 0;
01070 double totalSize;
01071 
01072 totalSize = (double)qSize * tSize;
01073 if (totalSize < 10000000.0)
01074     {
01075     xenAlignSmall(query, qSize, target, tSize, f, forHtml);
01076     return;
01077     }
01078 makeTempName(&dynTn, "xen", ".dyn");
01079 dynFile = mustOpen(dynTn.forCgi, "w");
01080 
01081 for (i = 0; i<lastQsection || i == 0; i += qMaxSize/2)
01082     {
01083     lqSize = qSize - i;
01084     if (lqSize > qMaxSize)
01085         lqSize = qMaxSize;
01086     caList = crudeAliFind(query+i, lqSize, &nt4, 1, 8, 12);
01087     if (forHtml)
01088         {
01089         fputc('.', stdout);
01090         fflush(stdout);
01091         }
01092     for (ca = caList; ca != NULL; ca = ca->next)
01093         {
01094         tMid = (ca->start + ca->end)/2;
01095         tMin = tMid-tMaxSize/2;
01096         tMax = tMin + tMaxSize;
01097         if (tMin < 0)
01098             tMin = 0;
01099         if (tMax > tSize)
01100             tMax = tSize;
01101         
01102         fprintf(dynFile, "Aligning query:%d-%d %c to target:%d-%d +\n",
01103             i, i+lqSize, ca->strand, tMin, tMax);
01104         if (ca->strand == '-')
01105             {
01106             reverseComplement(query+i, lqSize);
01107             }
01108         score = xenAlignSmall(query+i, lqSize, target+tMin, tMax-tMin, dynFile, FALSE);        
01109         if (ca->strand == '-')
01110             reverseComplement(query+i, lqSize);
01111         fprintf(dynFile, "best score %d\n", score);
01112         if (forHtml)
01113             {
01114             fputc('.', stdout);
01115             fflush(stdout);
01116             }
01117         ++totalAli;
01118         }
01119     slFreeList(&caList);
01120     }
01121 
01122 freeNt4(&nt4);
01123 fclose(dynFile);
01124 if (forHtml)
01125     {
01126     printf("\n %d alignments to stitch.\n", totalAli);
01127     }
01128 xenStitcher(dynTn.forCgi, f, 150000, FALSE);
01129 remove(dynTn.forCgi);
01130 }

Here is the call graph for this function:

void xenAlignWorm ( DNA query,
int  qSize,
FILE *  f,
boolean  forHtml 
)

Definition at line 1133 of file xenbig.c.

References chromNames, crudeAliFind(), FALSE, tempName::forCgi, freez(), makeTempName(), mustOpen(), crudeAli::next, reverseComplement(), slCat(), slCount(), slFreeList(), wormChromNames(), wormChromPart(), wormClipRangeToChrom(), wormFreeNt4Genome(), wormLoadNt4Genome(), xenAlignSmall(), and xenStitcher().

01135  {
01136 struct crudeAli *caList = NULL, *ca, *newCa;
01137 struct tempName dynTn;
01138 int i;
01139 int lqSize;
01140 int qMaxSize = 2000;
01141 int tMaxSize = 2*qMaxSize;
01142 int lastQsection = qSize - qMaxSize/2;
01143 FILE *dynFile;
01144 int tMin, tMax, tMid, tSize;
01145 int score;
01146 int chromCount;
01147 char **chromNames;
01148 struct nt4Seq **chrom;
01149 DNA *target;
01150 int sectionCount = 0;
01151 
01152 /* Get C. elegans genome and do crude alignments. */
01153 wormLoadNt4Genome(&chrom, &chromCount);
01154 wormChromNames(&chromNames, &chromCount);
01155 for (i = 0; i<lastQsection || i == 0; i += qMaxSize/2)
01156     {
01157     if (forHtml)
01158         {
01159         fputc('.', stdout);
01160         fflush(stdout);
01161         }
01162     lqSize = qSize - i;
01163     if (lqSize > qMaxSize)
01164         lqSize = qMaxSize;
01165     newCa = crudeAliFind(query+i, lqSize, chrom, chromCount, 8, 45);
01166     for (ca = newCa; ca != NULL; ca = ca->next)
01167         {
01168         ca->qStart = i;
01169         ca->qEnd = i+lqSize;
01170         }
01171     caList = slCat(caList,newCa);
01172     ++sectionCount;
01173     }
01174 wormFreeNt4Genome(&chrom);
01175 
01176 /* Make temporary output file. */
01177 makeTempName(&dynTn, "xen", ".dyn");
01178 dynFile = mustOpen(dynTn.forCgi, "w");
01179 
01180 /* Do dynamic programming alignment on each crude alignment. */
01181 if (forHtml)
01182     {
01183     int crudeCount = slCount(caList);
01184     if (crudeCount > 2*sectionCount)
01185         {
01186         printf("\nThis is a difficult alignment.  It looks like the query aligns with\n"
01187                "the <I>C. elegans</I> genome in %d places.  It will take about %1.0f more\n"
01188                "minutes to finish.\n", (crudeCount+sectionCount/2)/sectionCount, crudeCount*0.5);
01189         }
01190     fflush(stdout);
01191     }
01192 for (ca = caList; ca != NULL; ca = ca->next)
01193     {
01194     if (forHtml)
01195         {
01196         fputc('.', stdout);
01197         fflush(stdout);
01198         }
01199     tMid = (ca->start + ca->end)/2;
01200     tMin = tMid-tMaxSize/2;
01201     tMax = tMin + tMaxSize;
01202     wormClipRangeToChrom(chromNames[ca->chromIx], &tMin, &tMax);
01203     fprintf(dynFile, "Aligning query:%d-%d %c to %s:%d-%d +\n",
01204         ca->qStart, ca->qEnd, ca->strand, chromNames[ca->chromIx], tMin, tMax);
01205     lqSize = ca->qEnd - ca->qStart;
01206     if (ca->strand == '-')
01207         reverseComplement(query+ca->qStart, lqSize);
01208     tSize = tMax - tMin;
01209     target = wormChromPart(chromNames[ca->chromIx], tMin, tSize);
01210     score = xenAlignSmall(query+ca->qStart, lqSize, target, tSize, dynFile, FALSE);        
01211     freez(&target);
01212     if (ca->strand == '-')
01213         reverseComplement(query+ca->qStart, lqSize);
01214     fprintf(dynFile, "best score %d\n", score);
01215     }
01216 slFreeList(&caList);
01217 fclose(dynFile);
01218 
01219 if (forHtml)
01220     printf("\n");
01221 xenStitcher(dynTn.forCgi, f, 150000, FALSE);
01222 remove(dynTn.forCgi);
01223 }

Here is the call graph for this function:

void xenStitch ( char *  inName,
FILE *  out,
int  stitchMinScore,
boolean  compactOutput 
)

Definition at line 1040 of file xenbig.c.

References TRUE, and xStitch().

01043 {
01044 xStitch(inName, out, stitchMinScore, compactOutput, TRUE);
01045 }

Here is the call graph for this function:

void xenStitcher ( char *  inName,
FILE *  out,
int  stitchMinScore,
boolean  compactOutput 
)

Definition at line 1047 of file xenbig.c.

References FALSE, and xStitch().

Referenced by xenAlignBig(), and xenAlignWorm().

01050 {
01051 xStitch(inName, out, stitchMinScore, compactOutput, FALSE);
01052 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void xStitch ( char *  inName,
FILE *  out,
int  stitchMinScore,
boolean  compactOutput,
boolean  newFormat 
) [static]

Definition at line 845 of file xenbig.c.

References AllocVar, ArraySize, chopLine, chopString(), cloneString(), cloneStringZ(), countNonGap(), dlAddValTail(), errAbort(), finishContigs(), freeHash(), freeMem(), hashStoreName(), contig::hSym, contig::isComplete, max, min, mustOpen(), slName::name, needMem(), newDlList(), newHash(), newSlName(), contig::qEnd, contig::qEndOffset, contig::qOffset, contig::qStart, contig::qStrand, contig::qSym, contig::query, contig::queryFile, rcQueryOffsetsAroundSeg(), sameString, contig::score, slAddHead, slFreeList(), contig::symCount, contig::target, contig::tEnd, contig::tEndOffset, contig::tOffset, TRUE, contig::tStart, contig::tStrand, and contig::tSym.

Referenced by xenStitch(), and xenStitcher().

00849 {
00850 FILE *in;
00851 char line[512];
00852 int lineCount = 0;
00853 char *words[64];
00854 int wordCount;
00855 char *firstWord;
00856 char *queryName = "";
00857 char *queryFile = "";
00858 struct hash *queryFileHash = newHash(0);
00859 struct contig *contig = NULL;
00860 struct dlList *contigList = newDlList();
00861 int lineState = 0;   /* Keeps track of groups of four lines. */
00862 struct slName *queryNameList = NULL;
00863 int maxSymCount = 64*1024;
00864 char *qSymBuf = needMem(maxSymCount+1);
00865 char *tSymBuf = needMem(maxSymCount+1);
00866 char *hSymBuf = needMem(maxSymCount+1);
00867 int symCount = 0;
00868 int qSymLen = 0, tSymLen = 0, hSymLen = 0;
00869 
00870 in = mustOpen(inName, "r");
00871 while (fgets(line, sizeof(line), in))
00872     {
00873     ++lineCount;
00874     if (++lineState == 5)
00875         lineState = 0;
00876     wordCount = chopLine(line, words);
00877     if (wordCount <= 0)
00878         continue;
00879     firstWord = words[0];
00880     if (sameString(firstWord, "Aligning"))
00881         {
00882         char *queryString;
00883         char *targetString;
00884         char queryStrand, targetStrand;
00885         char *parts[8];
00886         int partCount;
00887 
00888         /* Do some preliminary checking of this line. */
00889         if (newFormat)
00890             {
00891             if (wordCount < 7)
00892                 errAbort("Short line %d of %s", lineCount, inName);
00893             queryFile = hashStoreName(queryFileHash, words[1]);
00894             queryString = words[2];
00895             queryStrand = words[3][0];
00896             targetString = words[5];
00897             targetStrand = words[6][0];
00898             }
00899         else
00900             {
00901             if (wordCount < 6)
00902                 errAbort("Short line %d of %s", lineCount, inName);
00903             queryString = words[1];
00904             queryStrand = words[2][0];
00905             targetString = words[4];
00906             targetStrand = words[5][0];
00907             }
00908 
00909         /* Extract the name of the query sequence.  If it's new,
00910          * then write out contigs on previous query we've accumulated
00911          * so far and start a new list. */
00912         partCount = chopString(queryString, ":-", parts, ArraySize(parts));
00913         if (!sameString(parts[0], queryName))
00914             {
00915             /* Allocate new name and keep track of it. */
00916             struct slName *newName = newSlName(parts[0]);
00917             slAddHead(&queryNameList, newName);
00918 
00919             /* Write out old contigs and empty out contig list. */
00920             finishContigs(&contigList, out, stitchMinScore, compactOutput, newFormat);
00921             queryName = newName->name;
00922             }
00923         
00924         /* Make up a new contig, and fill it in with the data we
00925          * have so far about query. */
00926         AllocVar(contig);
00927         dlAddValTail(contigList, contig);
00928         contig->queryFile = queryFile;
00929         contig->query = queryName;
00930         contig->qOffset = atoi(parts[1]);
00931         contig->qEndOffset = atoi(parts[2]);
00932         contig->qStrand = queryStrand;
00933 
00934         /* Parse target string and fill in contig with it's info. */
00935         chopString(targetString, ":-", parts, ArraySize(parts));
00936         contig->target = cloneString(parts[0]);
00937         contig->tOffset = atoi(parts[1]);
00938         contig->tEndOffset = atoi(parts[2]);
00939         contig->tStrand = targetStrand;
00940 
00941         /* We don't know start and end yet - set them to values
00942          * that will get easily replace by max/min. */
00943         contig->qStart = contig->tStart = 0x3fffffff;
00944 
00945         lineState = -1;
00946         symCount = 0;
00947         }
00948     else if (sameString(firstWord, "best"))
00949         {
00950         if (wordCount < 3)
00951             errAbort("Short line %d of %s", lineCount, inName);
00952         contig->score = atoi(words[2]);
00953         contig->isComplete = TRUE;
00954         contig->qSym = cloneStringZ(qSymBuf, symCount);
00955         contig->tSym = cloneStringZ(tSymBuf, symCount);
00956         contig->hSym = cloneStringZ(hSymBuf, symCount);
00957         contig->symCount = symCount;
00958         contig->qEnd = contig->qStart + countNonGap(qSymBuf, symCount);
00959         contig->tEnd = contig->tStart + countNonGap(tSymBuf, symCount);
00960         if (contig->qStrand != contig->tStrand) 
00961             rcQueryOffsetsAroundSeg(contig, contig->qOffset, contig->qEndOffset);
00962         }
00963     else if (wordCount > 1 && (isdigit(firstWord[0]) || firstWord[0] == '-'))
00964         {
00965         int start, end;
00966         char *sym = words[1];
00967         int symLen = strlen(sym);
00968         char firstChar = firstWord[0];
00969         if (lineState != 0 && lineState != 2)
00970             errAbort("Bummer - phasing mismatch on lineState line %d of %s!\n", lineCount, inName);
00971         assert(lineState == 0 || lineState == 2);
00972         start = atoi(firstWord);
00973         end = start + symLen;
00974         if (symCount + symLen > maxSymCount)
00975             {
00976             errAbort("Single contig too long line %d of %s, can only handle up to %d symbols\n",
00977                 lineCount, inName, maxSymCount);
00978             }
00979         if (lineState == 0) /* query symbols */
00980             {
00981             qSymLen = symLen;
00982             if (isdigit(firstChar))
00983                 {
00984                 start += contig->qOffset;
00985                 end += contig->qOffset;
00986                 contig->qStart = min(contig->qStart, start);
00987                 contig->qEnd = max(contig->qEnd, end);
00988                 }
00989             memcpy(qSymBuf+symCount, sym, symLen);
00990             }
00991         else               /* target symbols */
00992             {
00993             tSymLen = symLen;
00994             if (tSymLen != qSymLen)
00995                 {
00996                 errAbort("Target symbol size not same as query line %d of %s",
00997                     lineCount, inName);
00998                 }            
00999             if (isdigit(firstChar))
01000                 {
01001                 start += contig->tOffset;
01002                 end += contig->tOffset;
01003                 contig->tStart = min(contig->tStart, start);
01004                 }
01005             memcpy(tSymBuf+symCount, sym, symLen);
01006             }
01007         }
01008     else if (firstWord[0] == '(')
01009         {
01010         lineState = -1;
01011         }
01012     else
01013         {
01014         assert(lineState == 1 || lineState == 3);
01015         if (lineState == 3)  /* Hidden symbols. */
01016             {
01017             char *sym = firstWord;
01018             int symLen = strlen(sym);
01019             hSymLen = symLen;
01020             if (hSymLen != qSymLen)
01021                 {
01022                 errAbort("Hidden symbol size not same as query line %d of %s",
01023                     lineCount, inName);
01024                 }
01025             memcpy(hSymBuf+symCount, sym, symLen);
01026             symCount += symLen;
01027             }        
01028         }
01029     } 
01030 finishContigs(&contigList, out, stitchMinScore, compactOutput, newFormat);
01031 fclose(in);
01032 
01033 slFreeList(&queryNameList);
01034 freeHash(&queryFileHash);
01035 freeMem(qSymBuf);
01036 freeMem(tSymBuf);
01037 freeMem(hSymBuf);
01038 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char const rcsid[] = "$Id: xenbig.c,v 1.3 2006/03/15 18:36:16 angie Exp $" [static]

Definition at line 16 of file xenbig.c.


Generated on Tue Dec 25 19:31:34 2007 for blat by  doxygen 1.5.2