lib/boxLump.c

Go to the documentation of this file.
00001 /* boxLump - This will lump together boxes that overlap into the smallest
00002  * box that encompasses the overlap.  It will put other boxes that 
00003  * fit in the encompassing box in there too. 
00004  *   It works by projecting the box list along one dimension at a
00005  * time looking for gaps between boxes. This is similar in function
00006  * to boxFindClumps, but a bit less precise, and quite a bit faster.
00007  * in some important cases. */
00008 
00009 #include "common.h"
00010 #include "hash.h"
00011 #include "boxClump.h"
00012 #include "boxLump.h"
00013 
00014 int boxInCmpQuery(const void *va, const void *vb)
00015 /* Compare to sort based on query start. */
00016 {
00017 const struct boxIn *a = *((struct boxIn **)va);
00018 const struct boxIn *b = *((struct boxIn **)vb);
00019 return a->qStart - b->qStart;
00020 }
00021 
00022 int boxInCmpTarget(const void *va, const void *vb)
00023 /* Compare to sort based on query start. */
00024 {
00025 const struct boxIn *a = *((struct boxIn **)va);
00026 const struct boxIn *b = *((struct boxIn **)vb);
00027 return a->tStart - b->tStart;
00028 }
00029 
00030 struct boxClump *lumpOneDimension(struct boxIn *boxList, boolean onQuery)
00031 /* Build box clump list on one dimension. */
00032 {
00033 struct boxClump *clump = NULL, *clumpList = NULL;
00034 struct boxIn *box, *nextBox;
00035 if (onQuery)
00036     slSort(&boxList, boxInCmpQuery);
00037 else
00038     slSort(&boxList, boxInCmpTarget);
00039 for (box = boxList; box != NULL; box = nextBox)
00040     {
00041     nextBox = box->next;
00042     /* Make new clump containing current box. */
00043     if (clump == NULL || 
00044         (onQuery && clump->qEnd < box->qStart) ||
00045         (!onQuery && clump->tEnd < box->tStart) )
00046         {
00047         AllocVar(clump);
00048         slAddHead(&clumpList, clump);
00049         clump->qStart = box->qStart;
00050         clump->qEnd = box->qEnd;
00051         clump->tStart = box->tStart;
00052         clump->tEnd = box->tEnd;
00053         clump->boxCount = 1;
00054         clump->boxList = box;
00055         box->next = NULL;
00056         }
00057     else
00058         {
00059         if (clump->tStart > box->tStart)
00060              clump->tStart = box->tStart;
00061         if (clump->tEnd < box->tEnd)
00062              clump->tEnd = box->tEnd;
00063         if (clump->qEnd < box->qEnd)
00064              clump->qEnd = box->qEnd;
00065         if (clump->qStart > box->qStart)
00066              clump->qStart = box->qStart;
00067         clump->boxCount += 1;
00068         slAddHead(&clump->boxList, box);
00069         }
00070     }
00071 return clumpList;
00072 }
00073 
00074 struct boxClump *boxLump(struct boxIn **pBoxList)
00075 /* Convert list of boxes to a list of lumps.  The lumps
00076  * are a smaller number of boxes that between them contain
00077  * all of the input boxes.  Note that
00078  * the original boxList is overwritten as the boxes
00079  * are moved from it to the lumps. */
00080 {
00081 struct boxClump *qClumpList = NULL, *tClumpList = NULL, 
00082         *tClump;
00083 
00084 if (*pBoxList == NULL)
00085     return NULL;
00086 tClumpList = lumpOneDimension(*pBoxList, FALSE);
00087 
00088 for (tClump = tClumpList; tClump != NULL; tClump = tClump->next)
00089     {
00090     struct boxClump *oneList = lumpOneDimension(tClump->boxList, TRUE);
00091     if (slCount(oneList) > 1)
00092         {
00093         struct boxClump *clump;
00094         for (clump = oneList; clump != NULL; clump = clump->next)
00095             {
00096             struct boxClump *subList = boxLump(&clump->boxList);
00097             qClumpList = slCat(subList, qClumpList);
00098             }
00099         boxClumpFreeList(&oneList);
00100         }
00101     else
00102         {
00103         qClumpList = slCat(oneList, qClumpList);
00104         }
00105     tClump->boxList = NULL;
00106     }
00107 
00108 boxClumpFreeList(&tClumpList);
00109 *pBoxList = NULL;
00110 return qClumpList;
00111 }
00112 
00113 #ifdef DEBUG
00114 
00115 int testData[][4] = 
00116     {
00117         /* qStart, qEnd, tStart, tEnd */
00118         {0, 100,     0, 100},
00119         {50, 150,    50, 150},
00120         {200, 300,   200, 300},
00121         {250, 350,   250, 350},
00122         {0, 100,     200, 300},
00123         {50, 150,    250, 350},
00124         {200,300,    0, 100},
00125         {250,350,    50, 150},
00126         {500,600,    100,300},
00127         {500,600,    500, 600},
00128         {500,700,    500, 700},
00129         {1000,1100,  1000,1100},
00130         {1000,1100,  2000,2100},
00131     };
00132 
00133 
00134 void testBoxLump()
00135 /* Test boxLump routine. */
00136 {
00137 struct boxIn *boxList = NULL, *box;
00138 struct boxClump *clumpList, *clump;
00139 int i;
00140 
00141 for (i=0; i<ArraySize(testData); ++i)
00142     {
00143     AllocVar(box);
00144     box->qStart = testData[i][0];
00145     box->qEnd = testData[i][1];
00146     box->tStart = testData[i][2];
00147     box->tEnd = testData[i][3];
00148     slAddHead(&boxList, box);
00149     }
00150 clumpList = boxLump(&boxList);
00151 for (clump = clumpList; clump != NULL; clump = clump->next)
00152     {
00153     printf("%d,%d,\t%d,%d\n", clump->qStart, clump->qEnd, clump->tStart, clump->tEnd);
00154     for (box = clump->boxList; box != NULL; box = box->next)
00155         printf("\t%d,%d\t%d,%d\n", box->qStart, box->qEnd, box->tStart, box->tEnd);
00156     }
00157 }
00158 #endif /* DEBUG */

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2