lib/boxClump.c

Go to the documentation of this file.
00001 /* boxClump - put together 2 dimensional boxes that
00002  * overlap with each other into clumps. */
00003 
00004 #include "common.h"
00005 #include "dlist.h"
00006 #include "localmem.h"
00007 #include "boxClump.h"
00008 
00009 static char const rcsid[] = "$Id: boxClump.c,v 1.5 2005/01/10 00:11:34 kent Exp $";
00010 
00015 void boxClumpFree(struct boxClump **pClump)
00016 /* Free boxClump. */
00017 {
00018 struct boxClump *clump = *pClump;
00019 if (clump != NULL)
00020     {
00021     slFreeList(&clump->boxList);
00022     freez(pClump);
00023     }
00024 }
00025 
00026 void boxClumpFreeList(struct boxClump **pList)
00027 /* Free list of boxClumps. */
00028 {
00029 struct boxClump *el, *next;
00030 
00031 for (el = *pList; el != NULL; el = next)
00032     {
00033     next = el->next;
00034     boxClumpFree(&el);
00035     }
00036 *pList = NULL;
00037 }
00038 
00039 int boxClumpCmpCount(const void *va, const void *vb)
00040 /* Compare to sort based on count of boxes. */
00041 {
00042 const struct boxClump *a = *((struct boxClump **)va);
00043 const struct boxClump *b = *((struct boxClump **)vb);
00044 return b->boxCount - a->boxCount;
00045 }
00046 
00049 struct cluster
00050 /* A cluster of boxes. */
00051     {
00052     struct box *boxList;        /* List of boxes. */
00053     struct dlNode *node;        /* Position in doubly-linked list. */
00054     };
00055 
00056 struct box
00057 /* A box in an alignment. */
00058     {
00059     struct box *next;            /* Next in list (usually on cluster). */
00060     struct boxIn *in;            /* Input box position. */
00061     struct cluster *cluster;     /* Cluster this is part of. */
00062     };
00063 
00064 struct boxRef
00065 /* Reference to a box. */
00066     {
00067     struct boxRef *next;
00068     struct box *box;
00069     };
00070 
00071 
00076 static void mergeClusters(struct cluster *a, struct cluster *b)
00077 /* Merge cluster b into cluster a, and free remnants of b. */
00078 {
00079 struct box *box;
00080 
00081 /* Merging with yourself is always problematic. */
00082 if (a == b)
00083     return;
00084 
00085 /* Point all boxes in b to a. */
00086 for (box = b->boxList; box != NULL; box = box->next)
00087     box->cluster = a;
00088 
00089 /* Add b's boxList to end of a's. */
00090 a->boxList = slCat(a->boxList, b->boxList);
00091 
00092 /* Remove b from master cluster list. */
00093 dlRemove(b->node);
00094 }
00095 
00096 static boolean allStartBy(struct boxRef *refList, int qStart, int tStart)
00097 /* Return TRUE if qStart/qEnd of all boxes less than or equal to qStart/tStart 
00098  * This is an end condition for recursion along with only having two or
00099  * less boxes in the refList.  It handles the case where you have many
00100  * boxes stacked on top of each other. */
00101 {
00102 struct boxRef *ref;
00103 for (ref = refList; ref != NULL; ref = ref->next)
00104     {
00105     struct boxIn *box = ref->box->in;
00106     if (box->qStart > qStart || box->tStart > tStart)
00107         return FALSE;
00108     }
00109 return TRUE;
00110 }
00111 
00112 static boolean allSameCluster(struct boxRef *refList)
00113 /* Return TRUE if qStart/qEnd of all boxes is the same */
00114 {
00115 struct boxRef *ref;
00116 struct cluster *cluster = refList->box->cluster;
00117 for (ref = refList->next; ref != NULL; ref = ref->next)
00118     {
00119     if (ref->box->cluster != cluster)
00120         return FALSE;
00121     }
00122 return TRUE;
00123 }
00124 
00125 static struct lm *lm;
00126 /* Local memory manager used for boxRefs and other stuff. */
00127 
00128 static void rBoxJoin(struct boxRef *refList, 
00129         int qStart, int qEnd, int tStart, int tEnd)
00130 /* Recursively cluster boxes. */
00131 {
00132 int boxCount = slCount(refList);
00133 
00134 if (boxCount <= 1)
00135     {
00136     /* Easy: no merging required. */
00137     }
00138 else if (boxCount == 2)
00139     {
00140     /* Decide if pair overlaps and if so merge. */
00141     struct box *a = refList->box;
00142     struct box *b = refList->next->box;
00143     if (rangeIntersection(a->in->qStart, a->in->qEnd, b->in->qStart, b->in->qEnd) > 0 &&
00144         rangeIntersection(a->in->tStart, a->in->tEnd, b->in->tStart, b->in->tEnd) > 0 )
00145         {
00146         mergeClusters(a->cluster, b->cluster);
00147         }
00148     else
00149         {
00150         /* Two non-overlapping boxes, we don't have to do anything. */
00151         }
00152     }
00153 else if (allStartBy(refList, qStart, tStart))
00154     {
00155     /* If everybody contains the upper left corner, then they all can 
00156      * be merged.   This is the route taken often by clumps with lots
00157      * of overlap. */
00158     struct cluster *aCluster = refList->box->cluster;
00159     struct boxRef *ref;
00160     for (ref = refList->next; ref != NULL; ref = ref->next)
00161         {
00162         struct cluster *bCluster = ref->box->cluster;
00163         mergeClusters(aCluster, bCluster);
00164         }
00165     }
00166 else if (allSameCluster(refList))
00167     {
00168     /* Everything is in the same cluster, no action required. */
00169     }
00170 else
00171     {
00172     /* We can't yet figure out clumping, so break
00173      * up our window in two along larger dimension and
00174      * recurse on both subwindows. */
00175     struct boxRef *list1 = NULL, *list2 = NULL, *ref, *next;
00176     if (qEnd - qStart > tEnd - tStart)
00177         {
00178         int mid = (qStart + qEnd)>>1;
00179         for (ref = refList; ref != NULL; ref = next)
00180             {
00181             struct box *box = ref->box;
00182             next = ref->next;
00183             if (box->in->qEnd <= mid)
00184                 {
00185                 slAddHead(&list1, ref);
00186                 }
00187             else if (box->in->qStart >= mid)
00188                 {
00189                 slAddHead(&list2, ref);
00190                 }
00191             else
00192                 {
00193                 /* Box crosses boundary, have to put it on both lists. */
00194                 slAddHead(&list1, ref);
00195                 lmAllocVar(lm, ref);
00196                 ref->box = box;
00197                 slAddHead(&list2, ref);
00198                 }
00199             }
00200         rBoxJoin(list1, qStart, mid, tStart, tEnd);
00201         rBoxJoin(list2, mid, qEnd, tStart, tEnd);
00202         }
00203     else
00204         {
00205         int mid = (tStart + tEnd)>>1;
00206         for (ref = refList; ref != NULL; ref = next)
00207             {
00208             struct box *box = ref->box;
00209             next = ref->next;
00210             if (box->in->tEnd <= mid)
00211                 {
00212                 slAddHead(&list1, ref);
00213                 }
00214             else if (box->in->tStart >= mid)
00215                 {
00216                 slAddHead(&list2, ref);
00217                 }
00218             else
00219                 {
00220                 /* Box crosses boundary, have to put it on both lists. */
00221                 slAddHead(&list1, ref);
00222                 lmAllocVar(lm, ref);
00223                 ref->box = box;
00224                 slAddHead(&list2, ref);
00225                 }
00226             }
00227         rBoxJoin(list1, qStart, qEnd, tStart, mid);
00228         rBoxJoin(list2, qStart, qEnd, mid, tEnd);
00229         }
00230     }
00231 }
00232 
00233 
00234 struct boxClump *boxFindClumps(struct boxIn **pBoxList)
00235 /* Convert list of boxes to a list of clumps.  Clumps
00236  * are collections of boxes that overlap.  Note that
00237  * the original boxList is overwritten as the boxes
00238  * are moved from it to the clumps. */
00239 {
00240 struct boxIn *in;
00241 struct boxClump *clumpList = NULL, *clump;
00242 struct boxRef *refList = NULL, *ref;
00243 struct box *box;
00244 struct cluster *cluster;
00245 struct dlNode *node;
00246 struct dlList clusterList;
00247 int qStart = BIGNUM, qEnd = -BIGNUM;
00248 int tStart = BIGNUM, tEnd = -BIGNUM;
00249 
00250 
00251 dlListInit(&clusterList);
00252 lm = lmInit(0);
00253 
00254 /* Make local box structure, cluster, and reference
00255  * for each input box.  Calculate overall bounds. */
00256 for (in = *pBoxList; in != NULL; in = in->next)
00257     {
00258     lmAllocVar(lm, box);
00259     box->in = in;
00260     if (in->qStart < qStart) qStart = in->qStart;
00261     if (in->qEnd > qEnd) qEnd = in->qEnd;
00262     if (in->tStart < tStart) tStart = in->tStart;
00263     if (in->tEnd > tEnd) tEnd = in->tEnd;
00264     lmAllocVar(lm,cluster);
00265     lmAllocVar(lm, cluster->node);
00266     cluster->node->val = cluster;
00267     dlAddTail(&clusterList, cluster->node);
00268     cluster->boxList = box;
00269     box->cluster = cluster;
00270     lmAllocVar(lm, ref);
00271     ref->box = box;
00272     slAddHead(&refList, ref);
00273     }
00274 
00275 /* Call to recursive joiner. */
00276 rBoxJoin(refList, qStart, qEnd, tStart, tEnd);
00277 
00278 /* Copy from local memory and local data structures
00279  * to global memory and global data structures. */
00280 for (node = clusterList.head; !dlEnd(node); node = node->next)
00281     {
00282     int boxCount = 0;
00283     int qStart = BIGNUM, qEnd = -BIGNUM;
00284     int tStart = BIGNUM, tEnd = -BIGNUM;
00285     struct boxIn *boxList = NULL;
00286     cluster = node->val;
00287     for (box = cluster->boxList; box != NULL; box = box->next)
00288         {
00289         in = box->in;
00290         slAddHead(&boxList, in);
00291         if (in->qStart < qStart) qStart = in->qStart;
00292         if (in->qEnd > qEnd) qEnd = in->qEnd;
00293         if (in->tStart < tStart) tStart = in->tStart;
00294         if (in->tEnd > tEnd) tEnd = in->tEnd;
00295         ++boxCount;
00296         }
00297     if (boxCount > 0)
00298         {
00299         AllocVar(clump);
00300         slAddHead(&clumpList, clump);
00301         clump->boxList = boxList;
00302         clump->boxCount = boxCount;
00303         clump->qStart = qStart;
00304         clump->qEnd = qEnd;
00305         clump->tStart = tStart;
00306         clump->tEnd = tEnd;
00307         }
00308     }
00309 *pBoxList = NULL;
00310 lmCleanup(&lm);
00311 return clumpList;
00312 }
00313 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2