lib/boxClump.c File Reference

#include "common.h"
#include "dlist.h"
#include "localmem.h"
#include "boxClump.h"

Include dependency graph for boxClump.c:

Go to the source code of this file.

Data Structures

struct  cluster
struct  box
struct  boxRef

Functions

void boxClumpFree (struct boxClump **pClump)
void boxClumpFreeList (struct boxClump **pList)
int boxClumpCmpCount (const void *va, const void *vb)
static void mergeClusters (struct cluster *a, struct cluster *b)
static boolean allStartBy (struct boxRef *refList, int qStart, int tStart)
static boolean allSameCluster (struct boxRef *refList)
static void rBoxJoin (struct boxRef *refList, int qStart, int qEnd, int tStart, int tEnd)
boxClumpboxFindClumps (struct boxIn **pBoxList)

Variables

static char const rcsid [] = "$Id: boxClump.c,v 1.5 2005/01/10 00:11:34 kent Exp $"
static struct lmlm


Function Documentation

static boolean allSameCluster ( struct boxRef refList  )  [static]

Definition at line 112 of file boxClump.c.

References boxRef::box, box::cluster, FALSE, boxRef::next, and TRUE.

Referenced by rBoxJoin().

00114 {
00115 struct boxRef *ref;
00116 struct cluster *cluster = refList->box->cluster;
00117 for (ref = refList->next; ref != NULL; ref = ref->next)
00118     {
00119     if (ref->box->cluster != cluster)
00120         return FALSE;
00121     }
00122 return TRUE;
00123 }

Here is the caller graph for this function:

static boolean allStartBy ( struct boxRef refList,
int  qStart,
int  tStart 
) [static]

Definition at line 96 of file boxClump.c.

References boxRef::box, FALSE, box::in, boxRef::next, and TRUE.

Referenced by rBoxJoin().

00101 {
00102 struct boxRef *ref;
00103 for (ref = refList; ref != NULL; ref = ref->next)
00104     {
00105     struct boxIn *box = ref->box->in;
00106     if (box->qStart > qStart || box->tStart > tStart)
00107         return FALSE;
00108     }
00109 return TRUE;
00110 }

Here is the caller graph for this function:

int boxClumpCmpCount ( const void *  va,
const void *  vb 
)

Definition at line 39 of file boxClump.c.

References boxClump::boxCount.

00041 {
00042 const struct boxClump *a = *((struct boxClump **)va);
00043 const struct boxClump *b = *((struct boxClump **)vb);
00044 return b->boxCount - a->boxCount;
00045 }

void boxClumpFree ( struct boxClump **  pClump  ) 

Some simple utility function on globally declared data structures.

Definition at line 15 of file boxClump.c.

References boxClump::boxList, freez(), and slFreeList().

Referenced by boxClumpFreeList().

00017 {
00018 struct boxClump *clump = *pClump;
00019 if (clump != NULL)
00020     {
00021     slFreeList(&clump->boxList);
00022     freez(pClump);
00023     }
00024 }

Here is the call graph for this function:

Here is the caller graph for this function:

void boxClumpFreeList ( struct boxClump **  pList  ) 

Definition at line 26 of file boxClump.c.

References boxClumpFree(), and boxClump::next.

Referenced by boxLump().

00028 {
00029 struct boxClump *el, *next;
00030 
00031 for (el = *pList; el != NULL; el = next)
00032     {
00033     next = el->next;
00034     boxClumpFree(&el);
00035     }
00036 *pList = NULL;
00037 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct boxClump* boxFindClumps ( struct boxIn **  pBoxList  )  [read]

Definition at line 234 of file boxClump.c.

References AllocVar, BIGNUM, cluster::boxList, box::cluster, dlAddTail(), dlEnd, dlListInit(), dlList::head, box::in, lm, lmAllocVar, lmInit(), box::next, dlNode::next, boxIn::next, cluster::node, boxIn::qEnd, boxIn::qStart, rBoxJoin(), slAddHead, boxIn::tEnd, boxIn::tStart, and dlNode::val.

00239 {
00240 struct boxIn *in;
00241 struct boxClump *clumpList = NULL, *clump;
00242 struct boxRef *refList = NULL, *ref;
00243 struct box *box;
00244 struct cluster *cluster;
00245 struct dlNode *node;
00246 struct dlList clusterList;
00247 int qStart = BIGNUM, qEnd = -BIGNUM;
00248 int tStart = BIGNUM, tEnd = -BIGNUM;
00249 
00250 
00251 dlListInit(&clusterList);
00252 lm = lmInit(0);
00253 
00254 /* Make local box structure, cluster, and reference
00255  * for each input box.  Calculate overall bounds. */
00256 for (in = *pBoxList; in != NULL; in = in->next)
00257     {
00258     lmAllocVar(lm, box);
00259     box->in = in;
00260     if (in->qStart < qStart) qStart = in->qStart;
00261     if (in->qEnd > qEnd) qEnd = in->qEnd;
00262     if (in->tStart < tStart) tStart = in->tStart;
00263     if (in->tEnd > tEnd) tEnd = in->tEnd;
00264     lmAllocVar(lm,cluster);
00265     lmAllocVar(lm, cluster->node);
00266     cluster->node->val = cluster;
00267     dlAddTail(&clusterList, cluster->node);
00268     cluster->boxList = box;
00269     box->cluster = cluster;
00270     lmAllocVar(lm, ref);
00271     ref->box = box;
00272     slAddHead(&refList, ref);
00273     }
00274 
00275 /* Call to recursive joiner. */
00276 rBoxJoin(refList, qStart, qEnd, tStart, tEnd);
00277 
00278 /* Copy from local memory and local data structures
00279  * to global memory and global data structures. */
00280 for (node = clusterList.head; !dlEnd(node); node = node->next)
00281     {
00282     int boxCount = 0;
00283     int qStart = BIGNUM, qEnd = -BIGNUM;
00284     int tStart = BIGNUM, tEnd = -BIGNUM;
00285     struct boxIn *boxList = NULL;
00286     cluster = node->val;
00287     for (box = cluster->boxList; box != NULL; box = box->next)
00288         {
00289         in = box->in;
00290         slAddHead(&boxList, in);
00291         if (in->qStart < qStart) qStart = in->qStart;
00292         if (in->qEnd > qEnd) qEnd = in->qEnd;
00293         if (in->tStart < tStart) tStart = in->tStart;
00294         if (in->tEnd > tEnd) tEnd = in->tEnd;
00295         ++boxCount;
00296         }
00297     if (boxCount > 0)
00298         {
00299         AllocVar(clump);
00300         slAddHead(&clumpList, clump);
00301         clump->boxList = boxList;
00302         clump->boxCount = boxCount;
00303         clump->qStart = qStart;
00304         clump->qEnd = qEnd;
00305         clump->tStart = tStart;
00306         clump->tEnd = tEnd;
00307         }
00308     }
00309 *pBoxList = NULL;
00310 lmCleanup(&lm);
00311 return clumpList;
00312 }

Here is the call graph for this function:

static void mergeClusters ( struct cluster a,
struct cluster b 
) [static]

Routines that cluster using mostly the local data structures.

Definition at line 76 of file boxClump.c.

References cluster::boxList, box::cluster, dlRemove(), box::next, cluster::node, and slCat().

Referenced by rBoxJoin().

00078 {
00079 struct box *box;
00080 
00081 /* Merging with yourself is always problematic. */
00082 if (a == b)
00083     return;
00084 
00085 /* Point all boxes in b to a. */
00086 for (box = b->boxList; box != NULL; box = box->next)
00087     box->cluster = a;
00088 
00089 /* Add b's boxList to end of a's. */
00090 a->boxList = slCat(a->boxList, b->boxList);
00091 
00092 /* Remove b from master cluster list. */
00093 dlRemove(b->node);
00094 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void rBoxJoin ( struct boxRef refList,
int  qStart,
int  qEnd,
int  tStart,
int  tEnd 
) [static]

Definition at line 128 of file boxClump.c.

References allSameCluster(), allStartBy(), boxRef::box, box::cluster, box::in, lm, lmAllocVar, mergeClusters(), box::next, boxRef::next, boxIn::qEnd, boxIn::qStart, rangeIntersection(), slAddHead, slCount(), boxIn::tEnd, and boxIn::tStart.

Referenced by boxFindClumps().

00131 {
00132 int boxCount = slCount(refList);
00133 
00134 if (boxCount <= 1)
00135     {
00136     /* Easy: no merging required. */
00137     }
00138 else if (boxCount == 2)
00139     {
00140     /* Decide if pair overlaps and if so merge. */
00141     struct box *a = refList->box;
00142     struct box *b = refList->next->box;
00143     if (rangeIntersection(a->in->qStart, a->in->qEnd, b->in->qStart, b->in->qEnd) > 0 &&
00144         rangeIntersection(a->in->tStart, a->in->tEnd, b->in->tStart, b->in->tEnd) > 0 )
00145         {
00146         mergeClusters(a->cluster, b->cluster);
00147         }
00148     else
00149         {
00150         /* Two non-overlapping boxes, we don't have to do anything. */
00151         }
00152     }
00153 else if (allStartBy(refList, qStart, tStart))
00154     {
00155     /* If everybody contains the upper left corner, then they all can 
00156      * be merged.   This is the route taken often by clumps with lots
00157      * of overlap. */
00158     struct cluster *aCluster = refList->box->cluster;
00159     struct boxRef *ref;
00160     for (ref = refList->next; ref != NULL; ref = ref->next)
00161         {
00162         struct cluster *bCluster = ref->box->cluster;
00163         mergeClusters(aCluster, bCluster);
00164         }
00165     }
00166 else if (allSameCluster(refList))
00167     {
00168     /* Everything is in the same cluster, no action required. */
00169     }
00170 else
00171     {
00172     /* We can't yet figure out clumping, so break
00173      * up our window in two along larger dimension and
00174      * recurse on both subwindows. */
00175     struct boxRef *list1 = NULL, *list2 = NULL, *ref, *next;
00176     if (qEnd - qStart > tEnd - tStart)
00177         {
00178         int mid = (qStart + qEnd)>>1;
00179         for (ref = refList; ref != NULL; ref = next)
00180             {
00181             struct box *box = ref->box;
00182             next = ref->next;
00183             if (box->in->qEnd <= mid)
00184                 {
00185                 slAddHead(&list1, ref);
00186                 }
00187             else if (box->in->qStart >= mid)
00188                 {
00189                 slAddHead(&list2, ref);
00190                 }
00191             else
00192                 {
00193                 /* Box crosses boundary, have to put it on both lists. */
00194                 slAddHead(&list1, ref);
00195                 lmAllocVar(lm, ref);
00196                 ref->box = box;
00197                 slAddHead(&list2, ref);
00198                 }
00199             }
00200         rBoxJoin(list1, qStart, mid, tStart, tEnd);
00201         rBoxJoin(list2, mid, qEnd, tStart, tEnd);
00202         }
00203     else
00204         {
00205         int mid = (tStart + tEnd)>>1;
00206         for (ref = refList; ref != NULL; ref = next)
00207             {
00208             struct box *box = ref->box;
00209             next = ref->next;
00210             if (box->in->tEnd <= mid)
00211                 {
00212                 slAddHead(&list1, ref);
00213                 }
00214             else if (box->in->tStart >= mid)
00215                 {
00216                 slAddHead(&list2, ref);
00217                 }
00218             else
00219                 {
00220                 /* Box crosses boundary, have to put it on both lists. */
00221                 slAddHead(&list1, ref);
00222                 lmAllocVar(lm, ref);
00223                 ref->box = box;
00224                 slAddHead(&list2, ref);
00225                 }
00226             }
00227         rBoxJoin(list1, qStart, qEnd, tStart, mid);
00228         rBoxJoin(list2, qStart, qEnd, mid, tEnd);
00229         }
00230     }
00231 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

struct lm* lm [static]

Definition at line 125 of file boxClump.c.

Referenced by bandExt(), bandExtFf(), boxFindClumps(), chainBlocks(), cleanupMem(), dnaQuery(), ffAliFromSym(), ffFindExtendNmers(), ffSeedExtInMem(), genoFindDirect(), getHitsFromServer(), gfAlignTrans(), gfAlignTransTrans(), gfFastFindDnaHits(), gfFindAlignAaTrans(), gfFindClumps(), gfFindClumpsWithQmask(), gfFindHitsInRegion(), gfFindHitsWithQmask(), gfQuerySeqTrans(), gfQuerySeqTransTrans(), gfSegmentedFindHits(), gfSegmentedFindNearHits(), gfStraightFindHits(), gfStraightFindNearHits(), gfTransFindClumps(), gfTransTransFindBundles(), gfTransTransFindClumps(), initMem(), kdBuild(), kdTreeMake(), lmAlloc(), lmCleanup(), lmCloneMem(), lmCloneString(), lmInit(), lmSlName(), localNeedMem(), newBlock(), pslLoadLm(), pslxLoadLm(), rangeTreeNewDetailed(), rBoxJoin(), rbTreeNew(), rbTreeNewDetailed(), refineBundle(), scanIndexForSmallExons(), searchOneProt(), ssFindBestBig(), and transQuery().

char const rcsid[] = "$Id: boxClump.c,v 1.5 2005/01/10 00:11:34 kent Exp $" [static]

Definition at line 9 of file boxClump.c.


Generated on Tue Dec 25 19:35:45 2007 for blat by  doxygen 1.5.2