lib/binRange.c

Go to the documentation of this file.
00001 /* binRange Stuff to handle binning - which helps us restrict 
00002  * our attention to the parts of database that contain info
00003  * about a particular window on a chromosome. This scheme
00004  * will work without modification for chromosome sizes up
00005  * to half a gigaBase.  The finest sized bin is 128k (1<<17).
00006  * The next coarsest is 8x as big (1<<13).  There's a hierarchy
00007  * of bins with the chromosome itself being the final bin.
00008  * Features are put in the finest bin they'll fit in. 
00009  *
00010  * This file is copyright 2002 Jim Kent, but license is hereby
00011  * granted for all use - public, private or commercial. */
00012 
00013 #include "common.h"
00014 #include "binRange.h"
00015 
00016 static char const rcsid[] = "$Id: binRange.c,v 1.20 2006/07/01 05:01:25 kent Exp $";
00017 
00018 /* add one new level to get coverage past chrom sizes of 512 Mb
00019  *      effective limit is now the size of an integer since chrom start
00020  *      and end coordinates are always being used in int's == 2Gb-1 */
00021 static int binOffsetsExtended[] =
00022         {4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
00023 
00024 static int binOffsets[] = {512+64+8+1, 64+8+1, 8+1, 1, 0};
00025 #define _binFirstShift 17       /* How much to shift to get to finest bin. */
00026 #define _binNextShift 3         /* How much to shift to get to next larger bin. */
00027 
00028 int binLevelsExtended()
00029 /* Return number of levels to bins. */
00030 {
00031 return ArraySize(binOffsetsExtended);
00032 }
00033 
00034 int binLevels()
00035 /* Return number of levels to bins. */
00036 {
00037 return ArraySize(binOffsets);
00038 }
00039 
00040 int binFirstShift()
00041 /* Return amount to shift a number to get to finest bin. */
00042 {
00043 return _binFirstShift;
00044 }
00045 
00046 int binNextShift()
00047 /* Return amount to shift a number to get to next coarser bin. */
00048 {
00049 return _binNextShift;
00050 }
00051 
00052 int binOffsetExtended(int level)
00053 /* Return offset for bins of a given level. */
00054 {
00055 assert(level >= 0 && level < ArraySize(binOffsetsExtended));
00056 return binOffsetsExtended[level] + _binOffsetOldToExtended;
00057 }
00058 
00059 int binOffset(int level)
00060 /* Return offset for bins of a given level. */
00061 {
00062 assert(level >= 0 && level < ArraySize(binOffsets));
00063 return binOffsets[level];
00064 }
00065 
00066 static int binFromRangeStandard(int start, int end)
00067 /* Given start,end in chromosome coordinates assign it
00068  * a bin.   There's a bin for each 128k segment, for each
00069  * 1M segment, for each 8M segment, for each 64M segment,
00070  * and for each chromosome (which is assumed to be less than
00071  * 512M.)  A range goes into the smallest bin it will fit in. */
00072 {
00073 int startBin = start, endBin = end-1, i;
00074 startBin >>= _binFirstShift;
00075 endBin >>= _binFirstShift;
00076 for (i=0; i<ArraySize(binOffsets); ++i)
00077     {
00078     if (startBin == endBin)
00079         return binOffsets[i] + startBin;
00080     startBin >>= _binNextShift;
00081     endBin >>= _binNextShift;
00082     }
00083 errAbort("start %d, end %d out of range in findBin (max is 512M)", start, end);
00084 return 0;
00085 }
00086 
00087 static int binFromRangeExtended(int start, int end)
00088 /* Given start,end in chromosome coordinates assign it
00089  * a bin.   There's a bin for each 128k segment, for each
00090  * 1M segment, for each 8M segment, for each 64M segment,
00091  * for each 512M segment, and one top level bin for 4Gb.
00092  *      Note, since start and end are int's, the practical limit
00093  *      is up to 2Gb-1, and thus, only four result bins on the second
00094  *      level.
00095  * A range goes into the smallest bin it will fit in. */
00096 {
00097 int startBin = start, endBin = end-1, i;
00098 startBin >>= _binFirstShift;
00099 endBin >>= _binFirstShift;
00100 for (i=0; i<ArraySize(binOffsetsExtended); ++i)
00101     {
00102     if (startBin == endBin)
00103         return _binOffsetOldToExtended + binOffsetsExtended[i] + startBin;
00104     startBin >>= _binNextShift;
00105     endBin >>= _binNextShift;
00106     }
00107 errAbort("start %d, end %d out of range in findBin (max is 2Gb)", start, end);
00108 return 0;
00109 }
00110 
00111 int binFromRange(int start, int end)
00112 /* return bin that this start-end segment is in */
00113 {
00114 if (end <= BINRANGE_MAXEND_512M)
00115     return binFromRangeStandard(start, end);
00116 else
00117     return binFromRangeExtended(start, end);
00118 }
00119 
00120 static int binFromRangeBinKeeperExtended(int start, int end)
00121 /* This is just like binFromRangeExtended() above, but it doesn't limit
00122  * the answers to the range from _binOffsetOldToExtended and up.
00123  *      It simply uses the whole new bin scheme as if it was the only
00124  *      one.
00125  */
00126 {
00127 int startBin = start, endBin = end-1, i;
00128 startBin >>= _binFirstShift;
00129 endBin >>= _binFirstShift;
00130 for (i=0; i<ArraySize(binOffsetsExtended); ++i)
00131     {
00132     if (startBin == endBin)
00133         return binOffsetsExtended[i] + startBin;
00134     startBin >>= _binNextShift;
00135     endBin >>= _binNextShift;
00136     }
00137 errAbort("start %d, end %d out of range in findBin (max is 2Gb)", start, end);
00138 return 0;
00139 }
00140 
00141 struct binKeeper *binKeeperNew(int minPos, int maxPos)
00142 /* Create new binKeeper that can cover range. */
00143 {
00144 int binCount;
00145 struct binKeeper *bk;
00146 if (minPos < 0 || maxPos < 0 || minPos > maxPos)
00147     errAbort("bad range %d,%d in binKeeperNew", minPos, maxPos);
00148 
00149 binCount = binFromRangeBinKeeperExtended(maxPos-1, maxPos) + 1;
00150 AllocVar(bk);
00151 bk->minPos = minPos;
00152 bk->maxPos = maxPos;
00153 bk->binCount = binCount;
00154 AllocArray(bk->binLists, binCount);
00155 return bk;
00156 }
00157 
00158 void binKeeperFree(struct binKeeper **pBk)
00159 /* Free up a bin keeper. */
00160 {
00161 struct binKeeper *bk = *pBk;
00162 if (bk != NULL)
00163     {
00164     int i;
00165     for (i=0; i<bk->binCount; ++i)
00166         slFreeList(&bk->binLists[i]);
00167     freeMem(bk->binLists);
00168     freez(pBk);
00169     }
00170 }
00171 
00172 void binKeeperAdd(struct binKeeper *bk, int start, int end, void *val)
00173 /* Add item to binKeeper. */ 
00174 {
00175 int bin;
00176 struct binElement *el;
00177 if (start < bk->minPos || end > bk->maxPos || start > end)
00178     errAbort("(%d %d) out of range (%d %d) in binKeeperAdd", 
00179         start, end, bk->minPos, bk->maxPos);
00180 bin = binFromRangeBinKeeperExtended(start, end);
00181 assert(bin < bk->binCount);
00182 AllocVar(el);
00183 el->start = start;
00184 el->end = end;
00185 el->val = val;
00186 slAddHead(&bk->binLists[bin], el);
00187 }
00188 
00189 int binElementCmpStart(const void *va, const void *vb)
00190 /* Compare to sort based on start. */
00191 {
00192 const struct binElement *a = *((struct binElement **)va);
00193 const struct binElement *b = *((struct binElement **)vb);
00194 return a->start - b->start;
00195 }
00196 
00197 struct binElement *binKeeperFind(struct binKeeper *bk, int start, int end)
00198 /* Return a list of all items in binKeeper that intersect range.
00199  * Free this list with slFreeList. */
00200 {
00201 struct binElement *list = NULL, *newEl, *el;
00202 int startBin, endBin;
00203 int i,j;
00204 
00205 if (start < bk->minPos) start = bk->minPos;
00206 if (end > bk->maxPos) end = bk->maxPos;
00207 if (start >= end) return NULL;
00208 startBin = (start>>_binFirstShift);
00209 endBin = ((end-1)>>_binFirstShift);
00210 for (i=0; i<ArraySize(binOffsetsExtended); ++i)
00211     {
00212     int offset = binOffsetsExtended[i];
00213     for (j=startBin+offset; j<=endBin+offset; ++j)
00214         {
00215         for (el=bk->binLists[j]; el != NULL; el = el->next)
00216             {
00217             if (rangeIntersection(el->start, el->end, start, end) > 0)
00218                 {
00219                 newEl = CloneVar(el);
00220                 slAddHead(&list, newEl);
00221                 }
00222             }
00223         }
00224     startBin >>= _binNextShift;
00225     endBin >>= _binNextShift;
00226     }
00227 return list;
00228 }
00229 
00230 boolean binKeeperAnyOverlap(struct binKeeper *bk, int start, int end)
00231 /* Return TRUE if start/end overlaps with any items in binKeeper. */
00232 {
00233 struct binElement *el;
00234 int startBin, endBin;
00235 int i,j;
00236 
00237 if (start < bk->minPos) start = bk->minPos;
00238 if (end > bk->maxPos) end = bk->maxPos;
00239 if (start >= end) return FALSE;
00240 startBin = (start>>_binFirstShift);
00241 endBin = ((end-1)>>_binFirstShift);
00242 for (i=0; i<ArraySize(binOffsetsExtended); ++i)
00243     {
00244     int offset = binOffsetsExtended[i];
00245     for (j=startBin+offset; j<=endBin+offset; ++j)
00246         {
00247         for (el=bk->binLists[j]; el != NULL; el = el->next)
00248             {
00249             if (rangeIntersection(el->start, el->end, start, end) > 0)
00250                 {
00251                 return TRUE;
00252                 }
00253             }
00254         }
00255     startBin >>= _binNextShift;
00256     endBin >>= _binNextShift;
00257     }
00258 return FALSE;
00259 }
00260 
00261 void binKeeperReplaceVal(struct binKeeper *bk, int start, int end,
00262         void *oldVal, void *newVal)
00263 /* Replace occurences of old val in range from start->end with newVal */
00264 {
00265 struct binElement *el;
00266 int startBin, endBin;
00267 int i,j;
00268 
00269 if (start < bk->minPos) start = bk->minPos;
00270 if (end > bk->maxPos) end = bk->maxPos;
00271 if (start >= end) return;
00272 startBin = (start>>_binFirstShift);
00273 endBin = ((end-1)>>_binFirstShift);
00274 for (i=0; i<ArraySize(binOffsetsExtended); ++i)
00275     {
00276     int offset = binOffsetsExtended[i];
00277     for (j=startBin+offset; j<=endBin+offset; ++j)
00278         {
00279         for (el=bk->binLists[j]; el != NULL; el = el->next)
00280             {
00281             if (rangeIntersection(el->start, el->end, start, end) > 0)
00282                 {
00283                 if (el->val == oldVal)
00284                     {
00285                     el->val = newVal;
00286                     }
00287                 }
00288             }
00289         }
00290     startBin >>= _binNextShift;
00291     endBin >>= _binNextShift;
00292     }
00293 }
00294 
00295 
00296 struct binElement *binKeeperFindSorted(struct binKeeper *bk, int start, int end)
00297 /* Like binKeeperFind, but sort list on start coordinates. */
00298 {
00299 struct binElement *list = binKeeperFind(bk, start, end);
00300 slSort(&list, binElementCmpStart);
00301 return list;
00302 }
00303 
00304 struct binElement *binKeeperFindAll(struct binKeeper *bk)
00305 /* Get all elements sorted. */
00306 {
00307 return binKeeperFindSorted(bk, bk->minPos, bk->maxPos);
00308 }
00309 
00310 struct binElement *binKeeperFindLowest(struct binKeeper *bk, int start, int end)
00311 /* Find the lowest overlapping range. Quick even if search range large */
00312 {
00313 struct binElement *first = NULL, *el;
00314 int startBin = (start>>_binFirstShift), endBin = ((end-1)>>_binFirstShift);
00315 int i,j;
00316 
00317 /* Search the possible range of bins at each level, looking for lowest.  Once
00318  * an overlaping range is found at a level, continue with next level, however
00319  * must search an entire bin as they are not ordered. */
00320 for (i=0; i<ArraySize(binOffsetsExtended); ++i)
00321     {
00322     int offset = binOffsetsExtended[i];
00323     boolean foundOne = FALSE;
00324     for (j=startBin+offset; (j<=endBin+offset) && (!foundOne); ++j)
00325         {
00326         for (el=bk->binLists[j]; el != NULL; el = el->next)
00327             {
00328             if ((rangeIntersection(el->start, el->end, start, end) > 0)
00329                 && ((first == NULL) || (el->start < first->start)
00330                     || ((el->start == first->start)
00331                         && (el->end < first->end))))
00332                 {
00333                 first = el;
00334                 foundOne = TRUE;
00335                 }
00336             }
00337         }
00338     startBin >>= _binNextShift;
00339     endBin >>= _binNextShift;
00340     }
00341 return first;
00342 }
00343 
00344 
00345 void binKeeperRemove(struct binKeeper *bk, int start, int end, void *val)
00346 /* Remove item from binKeeper. */ 
00347 {
00348 int bin = binFromRangeBinKeeperExtended(start, end);
00349 struct binElement **pList = &bk->binLists[bin], *newList = NULL, *el, *next;
00350 for (el = *pList; el != NULL; el = next)
00351     {
00352     next = el->next;
00353     if (el->val == val && el->start == start && el->end == end)
00354         {
00355         freeMem(el);
00356         }
00357     else
00358         {
00359         slAddHead(&newList, el);
00360         }
00361     }
00362 slReverse(&newList);
00363 *pList = newList;
00364 }
00365 
00366 struct binKeeperCookie binKeeperFirst(struct binKeeper *bk)
00367 /* Return an object to use by binKeeperNext() to traverse the binElements.
00368  * The first call to binKeeperNext() will return the first entry in the
00369  * table. */
00370 {
00371 struct binKeeperCookie cookie;
00372 cookie.bk = bk;
00373 cookie.blIdx = 0;
00374 cookie.nextBel = bk->binLists[0];
00375 return cookie;
00376 }
00377 
00378 struct binElement* binKeeperNext(struct binKeeperCookie *cookie)
00379 /* Return the next entry in the binKeeper table.  */
00380 {
00381 /* if we don't have a next, move down bin list until we find one */
00382 while ((cookie->nextBel == NULL)
00383        && (++cookie->blIdx < cookie->bk->binCount))
00384     cookie->nextBel = cookie->bk->binLists[cookie->blIdx];
00385 if (cookie->blIdx >= cookie->bk->binCount)
00386     return NULL;  /* no more */
00387 else
00388     {
00389     struct binElement* bel = cookie->nextBel;
00390     cookie->nextBel = cookie->nextBel->next;
00391     return bel;
00392     }
00393 }

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2