lib/hash.c

Go to the documentation of this file.
00001 /* Hash.c - implements hashing. 
00002  *
00003  * This file is copyright 2002 Jim Kent, but license is hereby
00004  * granted for all use - public, private or commercial. */
00005 
00006 #include "common.h"
00007 #include "localmem.h"
00008 #include "hash.h"
00009 #include "obscure.h"
00010 #include "dystring.h"
00011 
00012 static char const rcsid[] = "$Id: hash.c,v 1.36 2007/04/14 17:07:27 markd Exp $";
00013 
00014 /*
00015  * Hash a string key.  This code is taken from Tcl interpreter. I was borrowed
00016  * after discovering a lot of collisions and poor utilization of the table
00017  * when hashing accessions.
00018  *
00019  * This function was compared to Bob Jenkins' lookup2 hash function and
00020  * (http://burtleburtle.net/bob/hash/) and Paul Hsieh's SuperFast
00021  * hash function (http://www.azillionmonkeys.com/qed/hash.html).
00022  * Both of those functions provided better utilization of the table,
00023  * but were also more expensive, so the Tcl function was used.
00024  * If hashing of binary keys is implemented, SuperFast hash should
00025  * be considered.
00026  *
00027  * for an explanation of this function, see HashStringKey() in the
00028  * Tcl source file, generic/tclHash.c, available from
00029  * http://tcl.sourceforge.net/.
00030  *
00031  * The Tcl code is:
00032  * Copyright (c) 1991-1993 The Regents of the University of California.
00033  * Copyright (c) 1994 Sun Microsystems, Inc.
00034  *
00035  * See the file "license.terms" (in the Tcl distribution) for complete
00036  * license (which is a BSD-style license).
00037  *
00038  * Since hashCrc() is used elsewhere in the code, it was left alone,
00039  * and a new function create for use in hash table.
00040  * -- markd
00041  */
00042 bits32 hashString(char *string)
00043 /* Compute a hash value of a string. */
00044 {
00045 char *keyStr = string;
00046 unsigned int result = 0;
00047 int c;
00048 
00049 while ((c = *keyStr++) != '\0')
00050     {
00051     result += (result<<3) + c;
00052     }
00053 return result;
00054 }
00055 
00056 bits32 hashCrc(char *string)
00057 /* Returns a CRC value on string. */
00058 {
00059 unsigned char *us = (unsigned char *)string;
00060 unsigned char c;
00061 bits32 shiftAcc = 0;
00062 bits32 addAcc = 0;
00063 
00064 while ((c = *us++) != 0)
00065     {
00066     shiftAcc <<= 2;
00067     shiftAcc += c;
00068     addAcc += c;
00069     }
00070 return shiftAcc + addAcc;
00071 }
00072 
00073 struct hashEl *hashLookup(struct hash *hash, char *name)
00074 /* Looks for name in hash table. Returns associated element,
00075  * if found, or NULL if not. */
00076 {
00077 struct hashEl *el = hash->table[hashString(name)&hash->mask];
00078 while (el != NULL)
00079     {
00080     if (strcmp(el->name, name) == 0)
00081         break;
00082     el = el->next;
00083     }
00084 return el;
00085 }
00086 
00087 struct hashEl *hashLookupUpperCase(struct hash *hash, char *name)
00088 /* Lookup upper cased name in hash. (Assumes all elements of hash
00089  * are themselves already in upper case.) */
00090 {
00091 char s[256];
00092 safef(s, sizeof(s), "%s", name);
00093 touppers(s);
00094 return hashLookup(hash, s);
00095 }
00096 
00097 
00098 struct hashEl *hashLookupNext(struct hashEl *hashEl)
00099 /* Find the next occurance of name that may occur in the table multiple times,
00100  * or NULL if not found. */
00101 {
00102 struct hashEl *el = hashEl->next;
00103 while (el != NULL)
00104     {
00105     if (strcmp(el->name, hashEl->name) == 0)
00106         break;
00107     el = el->next;
00108     }
00109 return el;
00110 }
00111 
00112 struct hashEl *hashAddN(struct hash *hash, char *name, int nameSize, void *val)
00113 /* Add name of given size to hash (no need to be zero terminated) */
00114 {
00115 struct hashEl *el = lmAlloc(hash->lm, sizeof(*el));
00116 int hashVal = (hashString(name)&hash->mask);
00117 el->name = lmAlloc(hash->lm, nameSize+1);
00118 memcpy(el->name, name, nameSize);
00119 el->val = val;
00120 el->next = hash->table[hashVal];
00121 hash->table[hashVal] = el;
00122 hash->elCount += 1;
00123 return el;
00124 }
00125 
00126 struct hashEl *hashAdd(struct hash *hash, char *name, void *val)
00127 /* Add new element to hash table. */
00128 {
00129 return hashAddN(hash, name, strlen(name), val);
00130 }
00131 
00132 boolean hashMayRemove(struct hash *hash, char *name)
00133 /* Remove item of the given name from hash table, if present.
00134  * Return true if it was present */
00135 {
00136 return (hashRemove(hash, name) != NULL);
00137 }
00138 
00139 void hashMustRemove(struct hash *hash, char *name)
00140 /* Remove item of the given name from hash table, or error
00141  * if not present */
00142 {
00143 if (hashRemove(hash, name) == NULL)
00144     errAbort("attempt to remove non-existant %s from hash", name);
00145 }
00146 
00147 void *hashRemove(struct hash *hash, char *name)
00148 /* Remove item of the given name from hash table. 
00149  * Returns value of removed item, or NULL if not in the table. */
00150 {
00151 struct hashEl *hel;
00152 void *ret;
00153 struct hashEl **pBucket = &hash->table[hashString(name)&hash->mask];
00154 for (hel = *pBucket; hel != NULL; hel = hel->next)
00155     if (sameString(hel->name, name))
00156         break;
00157 if (hel == NULL)
00158     return NULL;
00159 ret = hel->val;
00160 if (slRemoveEl(pBucket, hel))
00161     hash->elCount -= 1;
00162 return ret;
00163 }
00164 
00165 struct hashEl *hashAddUnique(struct hash *hash, char *name, void *val)
00166 /* Add new element to hash table. Squawk and die if not unique */
00167 {
00168 if (hashLookup(hash, name) != NULL)
00169     errAbort("%s duplicated, aborting", name);
00170 return hashAdd(hash, name, val);
00171 }
00172 
00173 struct hashEl *hashAddSaveName(struct hash *hash, char *name, void *val, char **saveName)
00174 /* Add new element to hash table.  Save the name of the element, which is now
00175  * allocated in the hash table, to *saveName.  A typical usage would be:
00176  *    AllocVar(el);
00177  *    hashAddSaveName(hash, name, el, &el->name);
00178  */
00179 {
00180 struct hashEl *hel = hashAdd(hash, name, val);
00181 *saveName = hel->name;
00182 return hel;
00183 }
00184 
00185 struct hashEl *hashStore(struct hash *hash, char *name)
00186 /* If element in hash already return it, otherwise add it
00187  * and return it. */
00188 {
00189 struct hashEl *hel;
00190 if ((hel = hashLookup(hash, name)) != NULL)
00191     return hel;
00192 return hashAdd(hash, name, NULL);
00193 }
00194 
00195 char  *hashStoreName(struct hash *hash, char *name)
00196 /* If element in hash already return it, otherwise add it
00197  * and return it. */
00198 {
00199 struct hashEl *hel;
00200 if (name == NULL)
00201     return NULL;
00202 if ((hel = hashLookup(hash, name)) != NULL)
00203     return hel->name;
00204 return hashAdd(hash, name, NULL)->name;
00205 }
00206 
00207 int hashIntVal(struct hash *hash, char *name)
00208 /* Find size of name in hash or die trying. */
00209 {
00210 void *val = hashMustFindVal(hash, name);
00211 return ptToInt(val);
00212 }
00213 
00214 int hashIntValDefault(struct hash *hash, char *name, int defaultInt)
00215 /* Return integer value associated with name in a simple 
00216  * hash of ints or defaultInt if not found. */
00217 {
00218 struct hashEl *hel = hashLookup(hash, name);
00219 if(hel == NULL)
00220     return defaultInt;
00221 return ptToInt(hel->val);
00222 }
00223 
00224 void *hashMustFindVal(struct hash *hash, char *name)
00225 /* Lookup name in hash and return val.  Abort if not found. */
00226 {
00227 struct hashEl *hel = hashLookup(hash, name);
00228 if (hel == NULL)
00229     errAbort("%s not found", name);
00230 return hel->val;
00231 }
00232 
00233 void *hashFindVal(struct hash *hash, char *name)
00234 /* Look up name in hash and return val or NULL if not found. */
00235 {
00236 struct hashEl *hel = hashLookup(hash, name);
00237 if (hel == NULL)
00238     return NULL;
00239 return hel->val;
00240 }
00241 
00242 void *hashOptionalVal(struct hash *hash, char *name, void *usual)
00243 /* Look up name in hash and return val, or usual if not found. */
00244 {
00245 struct hashEl *hel = hashLookup(hash, name);
00246 if (hel == NULL)
00247     return usual;
00248 else
00249     return hel->val;
00250 }
00251 
00252 char *hashMustFindName(struct hash *hash, char *name)
00253 /* Return name as stored in hash table (in hel->name). 
00254  * Abort if not found. */
00255 {
00256 struct hashEl *hel = hashLookup(hash, name);
00257 if (hel == NULL)
00258     errAbort("%s not found", name);
00259 return hel->name;
00260 }
00261 
00262 struct hashEl *hashAddInt(struct hash *hash, char *name, int val)
00263 /* Store integer value in hash */
00264 {
00265 char *pt = NULL;
00266 return hashAdd(hash, name, pt + val);
00267 }
00268 
00269 long long hashIntSum(struct hash *hash)
00270 /* Return sum of all the ints in a hash of ints. */
00271 {
00272 long long sum = 0;
00273 int i;
00274 struct hashEl *hel;
00275 for (i=0; i<hash->size; ++i)
00276     {
00277     for (hel = hash->table[i]; hel != NULL; hel = hel->next)
00278         {
00279         int num = ptToInt(hel->val);
00280         sum += (long long)num;
00281         }
00282     }
00283 return sum;
00284 }
00285 
00286 struct hash *newHash(int powerOfTwoSize)
00287 /* Returns new hash table. */
00288 {
00289 struct hash *hash = needMem(sizeof(*hash));
00290 int memBlockPower = 16;
00291 if (powerOfTwoSize == 0)
00292     powerOfTwoSize = 12;
00293 assert(powerOfTwoSize <= hashMaxSize && powerOfTwoSize > 0);
00294 hash->powerOfTwoSize = powerOfTwoSize;
00295 hash->size = (1<<powerOfTwoSize);
00296 /* Make size of memory block for allocator vary between
00297  * 256 bytes and 64k depending on size of table. */
00298 if (powerOfTwoSize < 8)
00299     memBlockPower = 8;
00300 else if (powerOfTwoSize < 16)
00301     memBlockPower = powerOfTwoSize;
00302 hash->lm = lmInit(1<<memBlockPower);
00303 hash->mask = hash->size-1;
00304 hash->table = lmAlloc(hash->lm, sizeof(struct hashEl *) * hash->size);
00305 return hash;
00306 }
00307 
00308 struct hash *hashFromSlNameList(void *list)
00309 /* Create a hash out of a list of slNames or any kind of list where the */
00310 /* first field is the next pointer and the second is the name. */
00311 {
00312 struct hash *hash = NULL;
00313 struct slName *namedList = list, *item;
00314 if (!list)
00315     return NULL;
00316 hash = newHash(0);
00317 for (item = namedList; item != NULL; item = item->next)
00318     hashAdd(hash, item->name, item);
00319 return hash;
00320 }
00321 
00322 void hashTraverseEls(struct hash *hash, void (*func)(struct hashEl *hel))
00323 /* Apply func to every element of hash with hashEl as parameter. */
00324 {
00325 int i;
00326 struct hashEl *hel;
00327 for (i=0; i<hash->size; ++i)
00328     {
00329     for (hel = hash->table[i]; hel != NULL; hel = hel->next)
00330         func(hel);
00331     }
00332 }
00333 
00334 void hashTraverseVals(struct hash *hash, void (*func)(void *val))
00335 /* Apply func to every element of hash with hashEl->val as parameter. */
00336 {
00337 int i;
00338 struct hashEl *hel;
00339 for (i=0; i<hash->size; ++i)
00340     {
00341     for (hel = hash->table[i]; hel != NULL; hel = hel->next)
00342         func(hel->val);
00343     }
00344 }
00345 
00346 int hashElCmp(const void *va, const void *vb)
00347 /* Compare two hashEl by name. */
00348 {
00349 const struct hashEl *a = *((struct hashEl **)va);
00350 const struct hashEl *b = *((struct hashEl **)vb);
00351 return strcmp(a->name, b->name);
00352 }
00353 
00354 void *hashElFindVal(struct hashEl *list, char *name)
00355 /* Look up name in hashEl list and return val or NULL if not found. */
00356 {
00357 struct hashEl *el;
00358 for (el = list; el != NULL; el = el->next)
00359     {
00360     if (strcmp(el->name, name) == 0)
00361         return el->val;
00362     }
00363 return NULL;
00364 }
00365 
00366 struct hashEl *hashElListHash(struct hash *hash)
00367 /* Return a list of all elements of hash.   Free return with hashElFreeList. */
00368 {
00369 int i;
00370 struct hashEl *hel, *dupe, *list = NULL;
00371 for (i=0; i<hash->size; ++i)
00372     {
00373     for (hel = hash->table[i]; hel != NULL; hel = hel->next)
00374         {
00375         dupe = CloneVar(hel);
00376         slAddHead(&list, dupe);
00377         }
00378     }
00379 return list;
00380 }
00381 
00382 
00383 void hashElFree(struct hashEl **pEl)
00384 /* Free hash el list returned from hashListAll.  (Don't use
00385  * this internally.) */
00386 {
00387 freez(pEl);
00388 }
00389 
00390 void hashElFreeList(struct hashEl **pList)
00391 /* Free hash el list returned from hashListAll.  (Don't use
00392  * this internally. */
00393 {
00394 slFreeList(pList);
00395 }
00396 
00397 struct hashCookie hashFirst(struct hash *hash)
00398 /* Return an object to use by hashNext() to traverse the hash table.
00399  * The first call to hashNext will return the first entry in the table. */
00400 {
00401 struct hashCookie cookie;
00402 cookie.hash = hash;
00403 cookie.idx = 0;
00404 cookie.nextEl = NULL;
00405 
00406 /* find first entry */
00407 for (cookie.idx = 0;
00408      (cookie.idx < hash->size) && (hash->table[cookie.idx] == NULL);
00409      cookie.idx++)
00410     continue;  /* empty body */
00411 if (cookie.idx < hash->size)
00412     cookie.nextEl = hash->table[cookie.idx];
00413 return cookie;
00414 }
00415 
00416 struct hashEl* hashNext(struct hashCookie *cookie)
00417 /* Return the next entry in the hash table, or NULL if no more. Do not modify
00418  * hash table while this is being used. */
00419 {
00420 /* NOTE: if hashRemove were coded to track the previous entry during the
00421  * search and then use it to do the remove, it would be possible to
00422  * remove the entry returned by this method */
00423 struct hashEl *retEl = cookie->nextEl;
00424 if (retEl == NULL)
00425     return NULL;  /* no more */
00426 
00427 /* find next entry */
00428 cookie->nextEl = retEl->next;
00429 if (cookie->nextEl == NULL)
00430     {
00431     for (cookie->idx++; (cookie->idx < cookie->hash->size)
00432              && (cookie->hash->table[cookie->idx] == NULL); cookie->idx++)
00433         continue;  /* empty body */
00434     if (cookie->idx < cookie->hash->size)
00435         cookie->nextEl = cookie->hash->table[cookie->idx];
00436     }
00437 return retEl;
00438 }
00439 
00440 void* hashNextVal(struct hashCookie *cookie)
00441 /* Return the next value in the hash table, or NULL if no more. Do not modify
00442  * hash table while this is being used. */
00443 {
00444 struct hashEl *hel = hashNext(cookie);
00445 if (hel == NULL)
00446     return NULL;
00447 else
00448     return hel->val;
00449 }
00450 
00451 char *hashNextName(struct hashCookie *cookie)
00452 /* Return the next name in the hash table, or NULL if no more. Do not modify
00453  * hash table while this is being used. */
00454 {
00455 struct hashEl *hel = hashNext(cookie);
00456 if (hel == NULL)
00457     return NULL;
00458 else
00459     return hel->name;
00460 }
00461 
00462 void freeHash(struct hash **pHash)
00463 /* Free up hash table. */
00464 {
00465 struct hash *hash = *pHash;
00466 if (hash == NULL)
00467     return;
00468 lmCleanup(&hash->lm);
00469 freez(pHash);
00470 }
00471 
00472 
00473 void freeHashAndVals(struct hash **pHash)
00474 /* Free up hash table and all values associated with it.
00475  * (Just calls freeMem on each hel->val) */
00476 {
00477 struct hash *hash;
00478 if ((hash = *pHash) != NULL)
00479     {
00480     hashTraverseVals(hash, freeMem);
00481     freeHash(pHash);
00482     }
00483 }
00484 
00485 void hashFreeWithVals(struct hash **pHash, void (freeFunc)())
00486 /* Free up hash table and all values associated with it. freeFunc is a
00487  * function to free an entry, should take a pointer to a pointer to an
00488  * entry. */
00489 {
00490 struct hash *hash = *pHash;
00491 if (hash != NULL)
00492     {
00493     struct hashCookie cookie = hashFirst(hash);
00494     struct hashEl *hel;
00495     while ((hel = hashNext(&cookie)) != NULL)
00496         freeFunc(&hel->val);
00497     hashFree(pHash);
00498     }
00499 }
00500 
00501 void hashFreeList(struct hash **pList)
00502 /* Free up a list of hashes. */
00503 {
00504 struct hash *el, *next;
00505 
00506 for (el = *pList; el != NULL; el = next)
00507     {
00508     next = el->next;
00509     hashFree(&el);
00510     }
00511 *pList = NULL;
00512 }
00513 
00514 static int bucketLen(struct hashEl *hel)
00515 /* determine how many elements are in a hash bucket */
00516 {
00517 int nel = 0;
00518 for (; hel != NULL; hel = hel->next)
00519     nel++;
00520 return nel;
00521 }
00522 
00523 void hashHisto(struct hash *hash, char *fname)
00524 /* Output bucket usage counts to a file for producing a histogram  */
00525 {
00526 FILE* fh = mustOpen(fname, "w");
00527 int i;
00528 
00529 for (i=0; i<hash->size; ++i)
00530     fprintf(fh, "%d\n", bucketLen(hash->table[i]));
00531 carefulClose(&fh);
00532 }
00533 
00534 struct hashEl *hashReplace(struct hash *hash, char *name, void *val)
00535 /* Replace an existing element in hash table, or add it if not present. */
00536 {
00537 if (hashLookup(hash, name))
00538     hashRemove(hash, name);
00539 return hashAdd(hash, name, val);
00540 }
00541 
00542 char *hashToRaString(struct hash *hash)
00543 /* Convert hash to string in ra format. */
00544 {
00545 struct hashEl *el, *list = hashElListHash(hash);
00546 struct dyString *dy = dyStringNew(0);
00547 slSort(&list, hashElCmp);
00548 for (el = list; el != NULL; el = el->next)
00549    {
00550    dyStringAppend(dy, el->name);
00551    dyStringAppendC(dy, ' ');
00552    dyStringAppend(dy, el->val);
00553    dyStringAppendC(dy, '\n');
00554    }
00555 hashElFreeList(&list);
00556 return dyStringCannibalize(&dy);
00557 }

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2