lib/chainBlock.c File Reference

#include "common.h"
#include "localmem.h"
#include "linefile.h"
#include "dlist.h"
#include "chainBlock.h"

Include dependency graph for chainBlock.c:

Go to the source code of this file.

Data Structures

struct  kdBranch
struct  kdLeaf
struct  kdTree
struct  predScore

Functions

static int kdLeafCmpQ (const void *va, const void *vb)
static int kdLeafCmpT (const void *va, const void *vb)
static int kdLeafCmpTotal (const void *va, const void *vb)
static int medianVal (struct dlList *list, int medianIx, int dim)
static int splitList (struct dlList *oldList, struct dlList *newList)
static void clearHits (struct dlList *list)
static struct kdBranchkdBuild (int nodeCount, struct dlList *lists[2], int dim, struct lm *lm)
static struct kdTreekdTreeMake (struct kdLeaf *leafList, struct lm *lm)
static struct predScore bestPredecessor (struct kdLeaf *lonely, ConnectCost connectCost, GapCost gapCost, void *gapData, int dim, struct kdBranch *branch, struct predScore bestSoFar)
static void updateScoresOnWay (struct kdBranch *branch, int dim, struct kdLeaf *leaf)
static void findBestPredecessors (struct kdTree *tree, struct kdLeaf *leafList, ConnectCost connectCost, GapCost gapCost, void *gapData)
static double scoreBlocks (struct cBlock *blockList, ConnectCost connectCost, void *gapData)
static struct chainpeelChains (char *qName, int qSize, char qStrand, char *tName, int tSize, struct kdLeaf *leafList, FILE *details)
chainchainBlocks (char *qName, int qSize, char qStrand, char *tName, int tSize, struct cBlock **pBlockList, ConnectCost connectCost, GapCost gapCost, void *gapData, FILE *details)

Variables

static char const rcsid [] = "$Id: chainBlock.c,v 1.17 2005/04/10 14:41:21 markd Exp $"


Function Documentation

static struct predScore bestPredecessor ( struct kdLeaf lonely,
ConnectCost  connectCost,
GapCost  gapCost,
void *  gapData,
int  dim,
struct kdBranch branch,
struct predScore  bestSoFar 
) [static, read]

Definition at line 205 of file chainBlock.c.

References kdBranch::cutCoord, kdBranch::hi, kdBranch::leaf, kdBranch::lo, kdBranch::maxQ, kdBranch::maxScore, kdBranch::maxT, predScore::pred, and predScore::score.

Referenced by findBestPredecessors().

00216 {
00217 struct kdLeaf *leaf;
00218 double maxScore = branch->maxScore + lonely->cb->score;
00219 
00220 /* If best score in this branch of tree wouldn't be enough
00221  * don't bother exploring it. First try without calculating
00222  * gap score in case gap score is a little expensive to calculate. */
00223 if (maxScore < bestSoFar.score)
00224     return bestSoFar;
00225 maxScore -= gapCost(lonely->cb->qStart - branch->maxQ, 
00226         lonely->cb->tStart - branch->maxT, gapData);
00227 if (maxScore < bestSoFar.score)
00228     return bestSoFar;
00229 
00230 
00231 /* If it's a terminal branch, then calculate score to connect
00232  * with it. */
00233 else if ((leaf = branch->leaf) != NULL)
00234     {
00235     if (leaf->cb->qStart < lonely->cb->qStart 
00236      && leaf->cb->tStart < lonely->cb->tStart)
00237         {
00238         double score = leaf->totalScore + lonely->cb->score - 
00239                 connectCost(leaf->cb, lonely->cb, gapData);
00240         if (score > bestSoFar.score)
00241            {
00242            bestSoFar.score = score;
00243            bestSoFar.pred = branch;
00244            }
00245         }
00246     return bestSoFar;
00247     }
00248 
00249 /* Otherwise explore sub-trees that could harbor predecessors. */
00250 else
00251     {
00252     int newDim = 1-dim;
00253     int dimCoord = (dim == 0 ? lonely->cb->qStart : lonely->cb->tStart);
00254     
00255     /* Explore hi branch first as it is more likely to have high
00256      * scores.  However only explore it if it can have things starting
00257      * before us. */
00258     if (dimCoord > branch->cutCoord)
00259          bestSoFar = bestPredecessor(lonely, connectCost, gapCost, gapData,
00260                 newDim, branch->hi, bestSoFar);
00261     bestSoFar = bestPredecessor(lonely, connectCost, gapCost, gapData,
00262         newDim, branch->lo, bestSoFar);
00263     return bestSoFar;
00264     }
00265 }

Here is the caller graph for this function:

struct chain* chainBlocks ( char *  qName,
int  qSize,
char  qStrand,
char *  tName,
int  tSize,
struct cBlock **  pBlockList,
ConnectCost  connectCost,
GapCost  gapCost,
void *  gapData,
FILE *  details 
) [read]

Definition at line 392 of file chainBlock.c.

References chainCmpScore(), findBestPredecessors(), kdLeafCmpT(), kdLeafCmpTotal(), kdTreeMake(), lm, lmAllocVar, lmCleanup(), lmInit(), chain::next, cBlock::next, peelChains(), cBlock::score, scoreBlocks(), slAddHead, slSort(), cBlock::tEnd, and cBlock::tStart.

Referenced by ssFindBestBig().

00412 {
00413 struct kdTree *tree;
00414 struct kdLeaf *leafList = NULL, *leaf;
00415 struct cBlock *block;
00416 struct chain *chainList = NULL, *chain;
00417 struct lm *lm;
00418 
00419 /* Empty lists will be problematic later, so deal with them here. */
00420 if (*pBlockList == NULL)
00421    return NULL;
00422 
00423 /* Make a leaf for each block. */
00424 lm = lmInit(0);  /* Memory for tree, branches and leaves. */
00425 for (block = *pBlockList; block != NULL; block = block->next)
00426     {
00427     /* Watch out for 0-length blocks in input: */
00428     if (block->tStart == block->tEnd)
00429         continue;
00430     lmAllocVar(lm, leaf);
00431     leaf->cb = block;
00432     leaf->totalScore = block->score;
00433     slAddHead(&leafList, leaf);
00434     }
00435 
00436 /* Figure out chains. */
00437 slSort(&leafList, kdLeafCmpT);
00438 tree = kdTreeMake(leafList, lm);
00439 findBestPredecessors(tree, leafList, connectCost, gapCost, gapData);
00440 slSort(&leafList, kdLeafCmpTotal);
00441 chainList = peelChains(qName, qSize, qStrand, tName, tSize, leafList, details);
00442 
00443 /* Rescore chains (since some truncated) */
00444 for (chain = chainList; chain != NULL; chain = chain->next)
00445     chain->score = scoreBlocks(chain->blockList, connectCost, gapData);
00446 slSort(&chainList,  chainCmpScore);
00447 
00448 /* Clean up and go home. */
00449 lmCleanup(&lm);
00450 *pBlockList = NULL;
00451 return chainList;
00452 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void clearHits ( struct dlList list  )  [static]

Definition at line 110 of file chainBlock.c.

References dlEnd, FALSE, dlList::head, dlNode::next, and dlNode::val.

Referenced by kdBuild().

00112 {
00113 struct dlNode *node;
00114 for (node = list->head; !dlEnd(node); node = node->next)
00115     {
00116     struct kdLeaf *leaf = node->val;
00117     leaf->hit = FALSE;
00118     }
00119 }

Here is the caller graph for this function:

static void findBestPredecessors ( struct kdTree tree,
struct kdLeaf leafList,
ConnectCost  connectCost,
GapCost  gapCost,
void *  gapData 
) [static]

Definition at line 284 of file chainBlock.c.

References kdLeaf::bestPred, bestPredecessor(), kdLeaf::next, kdTree::root, kdLeaf::totalScore, and updateScoresOnWay().

Referenced by chainBlocks().

00287 {
00288 static struct predScore noBest;
00289 struct kdLeaf *leaf;
00290 struct kdLeaf *bestLeaf = NULL;
00291 double bestScore = 0;
00292 
00293 for (leaf = leafList; leaf != NULL; leaf = leaf->next)
00294     {
00295     struct predScore best;
00296     best = bestPredecessor(leaf, connectCost, gapCost, gapData, 0, tree->root, noBest);
00297     if (best.score > leaf->totalScore)
00298         {
00299         leaf->totalScore = best.score;
00300         leaf->bestPred = best.pred;
00301         }
00302     updateScoresOnWay(tree->root, 0, leaf);
00303     if (bestScore < leaf->totalScore)
00304         {
00305         bestScore = leaf->totalScore;
00306         bestLeaf = leaf;
00307         }
00308     }
00309 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct kdBranch* kdBuild ( int  nodeCount,
struct dlList lists[2],
int  dim,
struct lm lm 
) [static, read]

Definition at line 122 of file chainBlock.c.

References kdLeaf::cb, clearHits(), kdBranch::cutCoord, dlList::head, kdBranch::hi, kdBranch::leaf, lm, lmAllocVar, kdBranch::lo, max, kdBranch::maxQ, kdBranch::maxT, medianVal(), cBlock::qEnd, splitList(), cBlock::tEnd, and dlNode::val.

Referenced by kdTreeMake().

00125 {
00126 struct kdBranch *branch;
00127 lmAllocVar(lm, branch);
00128 if (nodeCount == 1)
00129     {
00130     struct kdLeaf *leaf = lists[0]->head->val;
00131     branch->leaf = leaf;
00132     branch->maxQ = leaf->cb->qEnd;
00133     branch->maxT = leaf->cb->tEnd;
00134     }
00135 else
00136     {
00137     int newCount;
00138     struct dlList *newLists[2];
00139     struct dlList newQ, newT;
00140     int nextDim = 1-dim;
00141 
00142     /* Subdivide lists along median.  */
00143     newLists[0] = &newQ;
00144     newLists[1] = &newT;
00145     clearHits(lists[0]);
00146     branch->cutCoord = medianVal(lists[dim], nodeCount/2, dim);
00147     newCount = splitList(lists[0], newLists[0]);
00148     splitList(lists[1], newLists[1]);
00149 
00150     /* Recurse on each side. */
00151     branch->lo = kdBuild(nodeCount - newCount, lists, nextDim, lm);
00152     branch->hi = kdBuild(newCount, newLists, nextDim, lm);
00153     branch->maxQ = max(branch->lo->maxQ, branch->hi->maxQ);
00154     branch->maxT = max(branch->lo->maxT, branch->hi->maxT);
00155     }
00156 return branch;
00157 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int kdLeafCmpQ ( const void *  va,
const void *  vb 
) [static]

Definition at line 45 of file chainBlock.c.

References kdLeaf::cb, and cBlock::qStart.

Referenced by kdTreeMake().

00047 {
00048 const struct kdLeaf *a = *((struct kdLeaf **)va);
00049 const struct kdLeaf *b = *((struct kdLeaf **)vb);
00050 return a->cb->qStart - b->cb->qStart;
00051 }

Here is the caller graph for this function:

static int kdLeafCmpT ( const void *  va,
const void *  vb 
) [static]

Definition at line 53 of file chainBlock.c.

References kdLeaf::cb, and cBlock::tStart.

Referenced by chainBlocks().

00055 {
00056 const struct kdLeaf *a = *((struct kdLeaf **)va);
00057 const struct kdLeaf *b = *((struct kdLeaf **)vb);
00058 return a->cb->tStart - b->cb->tStart;
00059 }

Here is the caller graph for this function:

static int kdLeafCmpTotal ( const void *  va,
const void *  vb 
) [static]

Definition at line 61 of file chainBlock.c.

References kdLeaf::totalScore.

Referenced by chainBlocks().

00063 {
00064 const struct kdLeaf *a = *((struct kdLeaf **)va);
00065 const struct kdLeaf *b = *((struct kdLeaf **)vb);
00066 double diff = b->totalScore - a->totalScore;
00067 if (diff < 0) return -1;
00068 else if (diff > 0) return 1;
00069 else return 0;
00070 }

Here is the caller graph for this function:

static struct kdTree* kdTreeMake ( struct kdLeaf leafList,
struct lm lm 
) [static, read]

Definition at line 159 of file chainBlock.c.

References AllocArray, dlAddTail(), dlListInit(), dlSort(), freeMem(), kdBuild(), kdLeafCmpQ(), lm, lmAllocVar, kdLeaf::next, kdTree::root, slCount(), and dlNode::val.

Referenced by chainBlocks().

00161 {
00162 struct kdLeaf *leaf;
00163 int nodeCount = slCount(leafList);
00164 struct kdTree *tree;
00165 struct dlList qList, tList,*lists[2];
00166 struct dlNode *qNodes, *tNodes;
00167 int i;
00168 
00169 /* Build lists sorted in each dimension. This
00170  * will let us quickly find medians while constructing
00171  * the kd-tree. */
00172 dlListInit(&qList);
00173 dlListInit(&tList);
00174 AllocArray(qNodes, nodeCount);
00175 AllocArray(tNodes, nodeCount);
00176 for (i=0 , leaf=leafList; leaf != NULL; leaf = leaf->next, ++i)
00177     {
00178     qNodes[i].val = tNodes[i].val = leaf;
00179     dlAddTail(&qList, &qNodes[i]);
00180     dlAddTail(&tList, &tNodes[i]);
00181     }
00182 /* Just sort qList since tList is sorted because it was
00183  * constructed from sorted leafList. */
00184 dlSort(&qList, kdLeafCmpQ); 
00185 lists[0] = &qList;
00186 lists[1] = &tList;
00187 
00188 /* Allocate master data structure and call recursive builder. */
00189 lmAllocVar(lm, tree);
00190 tree->root = kdBuild(nodeCount, lists, 0, lm);
00191 
00192 /* Clean up and go home. */
00193 freeMem(qNodes);
00194 freeMem(tNodes);
00195 return tree;
00196 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int medianVal ( struct dlList list,
int  medianIx,
int  dim 
) [static]

Definition at line 72 of file chainBlock.c.

References kdLeaf::cb, dlList::head, kdLeaf::hit, dlNode::next, cBlock::qStart, TRUE, cBlock::tStart, and dlNode::val.

Referenced by kdBuild().

00075 {
00076 struct dlNode *node = list->head;
00077 struct kdLeaf *leaf = NULL;
00078 int i;
00079 
00080 for (i=0; i<medianIx; ++i)
00081     {
00082     leaf = node->val;
00083     leaf->hit = TRUE;  
00084     node = node->next;
00085     }
00086 return (dim == 0 ? leaf->cb->qStart : leaf->cb->tStart);
00087 }

Here is the caller graph for this function:

static struct chain* peelChains ( char *  qName,
int  qSize,
char  qStrand,
char *  tName,
int  tSize,
struct kdLeaf leafList,
FILE *  details 
) [static, read]

Definition at line 327 of file chainBlock.c.

References AllocVar, kdLeaf::cb, chainWriteHead(), cloneString(), FALSE, kdLeaf::hit, kdLeaf::next, cBlock::qEnd, slAddHead, cBlock::tEnd, kdLeaf::totalScore, and TRUE.

Referenced by chainBlocks().

00331 {
00332 struct kdLeaf *leaf;
00333 struct chain *chainList = NULL, *chain;
00334 for (leaf = leafList; leaf != NULL; leaf = leaf->next)
00335     leaf->hit = FALSE;
00336 for (leaf = leafList; leaf != NULL; leaf = leaf->next)
00337     {
00338     if (!leaf->hit)
00339         {
00340         struct kdLeaf *lf;
00341         AllocVar(chain);
00342         chain->qName = cloneString(qName);
00343         chain->qSize = qSize;
00344         chain->qStrand = qStrand;
00345         chain->tName = cloneString(tName);
00346         chain->tSize = tSize;
00347         chain->qEnd = leaf->cb->qEnd;
00348         chain->tEnd = leaf->cb->tEnd;
00349         if (details)
00350             {
00351             chain->score = leaf->totalScore;
00352             chain->id = -1;
00353             chainWriteHead(chain, details);
00354             chain->score = 0;
00355             chain->id = 0;
00356             }
00357         slAddHead(&chainList, chain);
00358         for (lf = leaf; ; )
00359             {
00360             lf->hit = TRUE;
00361             slAddHead(&chain->blockList, lf->cb);
00362             chain->qStart = lf->cb->qStart;
00363             chain->tStart = lf->cb->tStart;
00364             if (details)
00365                 {
00366                 struct cBlock *b = lf->cb;
00367                 fprintf(details, "%d\t%f\t%d\t%d\t%d\n", b->score, lf->totalScore,
00368                         b->tStart, b->qStart, b->qEnd - b->qStart);
00369                 }
00370             if (lf->bestPred == NULL)
00371                  break;
00372             else
00373                 {
00374                 if (details)
00375                     {
00376                     struct cBlock *b = lf->cb;
00377                     struct cBlock *a = lf->bestPred->leaf->cb;
00378                     fprintf(details, " gap %d\t%d\n", 
00379                         b->tStart - a->tEnd, b->qStart - a->qEnd);
00380                     }
00381                 }
00382             lf = lf->bestPred->leaf;
00383             if (lf->hit)
00384                 break;
00385             }
00386         }
00387     }
00388 slReverse(&chainList);
00389 return chainList;
00390 }

Here is the call graph for this function:

Here is the caller graph for this function:

static double scoreBlocks ( struct cBlock blockList,
ConnectCost  connectCost,
void *  gapData 
) [static]

Definition at line 311 of file chainBlock.c.

References cBlock::next, and cBlock::score.

Referenced by chainBlocks().

00314 {
00315 struct cBlock *block, *lastBlock = NULL;
00316 double score = 0;
00317 for (block = blockList; block != NULL; block = block->next)
00318     {
00319     score += block->score;
00320     if (lastBlock != NULL)
00321         score -= connectCost(lastBlock, block, gapData);
00322     lastBlock = block;
00323     }
00324 return score;
00325 }

Here is the caller graph for this function:

static int splitList ( struct dlList oldList,
struct dlList newList 
) [static]

Definition at line 89 of file chainBlock.c.

References dlAddTail(), dlEnd, dlListInit(), dlRemove(), dlList::head, kdLeaf::hit, kdLeaf::next, dlNode::next, and dlNode::val.

Referenced by kdBuild().

00091 {
00092 struct dlNode *node, *next;
00093 struct kdLeaf *leaf;
00094 int newCount = 0;
00095 dlListInit(newList);
00096 for (node = oldList->head; !dlEnd(node); node = next)
00097     {
00098     next = node->next;
00099     leaf = node->val;
00100     if (!leaf->hit)
00101         {
00102         dlRemove(node);
00103         dlAddTail(newList, node);
00104         ++newCount;
00105         }
00106     }
00107 return newCount;
00108 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void updateScoresOnWay ( struct kdBranch branch,
int  dim,
struct kdLeaf leaf 
) [static]

Definition at line 267 of file chainBlock.c.

References kdLeaf::cb, kdBranch::cutCoord, kdBranch::hi, kdBranch::leaf, kdBranch::lo, kdBranch::maxScore, cBlock::qStart, kdLeaf::totalScore, and cBlock::tStart.

Referenced by findBestPredecessors().

00271 {
00272 int newDim = 1-dim;
00273 int dimCoord = (dim == 0 ? leaf->cb->qStart : leaf->cb->tStart);
00274 if (branch->maxScore < leaf->totalScore) branch->maxScore = leaf->totalScore;
00275 if (branch->leaf == NULL)
00276     {
00277     if (dimCoord <= branch->cutCoord)
00278         updateScoresOnWay(branch->lo, newDim, leaf);
00279     if (dimCoord >= branch->cutCoord)
00280         updateScoresOnWay(branch->hi, newDim, leaf);
00281     }
00282 }

Here is the caller graph for this function:


Variable Documentation

char const rcsid[] = "$Id: chainBlock.c,v 1.17 2005/04/10 14:41:21 markd Exp $" [static]

Definition at line 12 of file chainBlock.c.


Generated on Tue Dec 25 19:37:24 2007 for blat by  doxygen 1.5.2