lib/obscure.c

Go to the documentation of this file.
00001 /* Obscure stuff that is handy every now and again. 
00002  *
00003  * This file is copyright 2002 Jim Kent, but license is hereby
00004  * granted for all use - public, private or commercial. */
00005 
00006 #include "common.h"
00007 #include <unistd.h>
00008 #include "portable.h"
00009 #include "localmem.h"
00010 #include "hash.h"
00011 #include "obscure.h"
00012 #include "linefile.h"
00013 
00014 static char const rcsid[] = "$Id: obscure.c,v 1.44 2007/03/02 00:45:01 kent Exp $";
00015 static int _dotForUserMod = 100; /* How often does dotForUser() output a dot. */
00016 
00017 long incCounterFile(char *fileName)
00018 /* Increment a 32 bit value on disk. */
00019 {
00020 long val = 0;
00021 FILE *f = fopen(fileName, "r+b");
00022 if (f != NULL)
00023     {
00024     fread(&val, sizeof(val), 1, f);
00025     rewind(f);
00026     }
00027 else
00028     {
00029     f = fopen(fileName, "wb");
00030     }
00031 ++val;
00032 if (f != NULL)
00033     {
00034     fwrite(&val, sizeof(val), 1, f);
00035     if (fclose(f) != 0)
00036         errnoAbort("fclose failed");
00037     }
00038 return val;
00039 }
00040 
00041 int digitsBaseTwo(unsigned long x)
00042 /* Return base two # of digits. */
00043 {
00044 int digits = 0;
00045 while (x)
00046     {
00047     digits += 1;
00048     x >>= 1;
00049     }
00050 return digits;
00051 }
00052 
00053 int digitsBaseTen(int x)
00054 /* Return number of digits base 10. */
00055 {
00056 int digCount = 1;
00057 if (x < 0)
00058     {
00059     digCount = 2;
00060     x = -x;
00061     }
00062 while (x >= 10)
00063     {
00064     digCount += 1;
00065     x /= 10;
00066     }
00067 return digCount;
00068 }
00069 
00070 void writeGulp(char *file, char *buf, int size)
00071 /* Write out a bunch of memory. */
00072 {
00073 FILE *f = mustOpen(file, "w");
00074 mustWrite(f, buf, size);
00075 carefulClose(&f);
00076 }
00077 
00078 void readInGulp(char *fileName, char **retBuf, size_t *retSize)
00079 /* Read whole file in one big gulp. */
00080 {
00081 size_t size = (size_t)fileSize(fileName);
00082 char *buf;
00083 FILE *f = mustOpen(fileName, "rb");
00084 *retBuf = buf = needLargeMem(size+1);
00085 mustRead(f, buf, size);
00086 buf[size] = 0;      /* Just in case it needs zero termination. */
00087 fclose(f);
00088 if (retSize != NULL)
00089     *retSize = size;
00090 }
00091 
00092 void readAllWords(char *fileName, char ***retWords, int *retWordCount, char **retBuf)
00093 /* Read in whole file and break it into words. You need to freeMem both
00094  * *retWordCount and *retBuf when done. */
00095 {
00096 int wordCount;
00097 char *buf = NULL;
00098 char **words = NULL;
00099 size_t bufSize;
00100 
00101 readInGulp(fileName, &buf, &bufSize);
00102 wordCount = chopByWhite(buf, NULL, 0);
00103 if (wordCount != 0)
00104     {
00105     words = needMem(wordCount * sizeof(words[0]));
00106     chopByWhite(buf, words, wordCount);
00107     }
00108 *retWords = words;
00109 *retWordCount = wordCount;
00110 *retBuf = buf;
00111 }
00112 
00113 int countWordsInFile(char *fileName)
00114 /* Count number of words in file. */
00115 {
00116 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00117 char *line;
00118 int wordCount = 0;
00119 while (lineFileNext(lf, &line, NULL))
00120     wordCount += chopByWhite(line, NULL, 0);
00121 lineFileClose(&lf);
00122 return wordCount;
00123 }
00124 
00125 struct hash *hashWordsInFile(char *fileName, int hashSize)
00126 /* Create a hash of space delimited words in file. */
00127 {
00128 struct hash *hash = newHash(hashSize);
00129 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00130 char *line, *word;
00131 while (lineFileNext(lf, &line, NULL))
00132     {
00133     while ((word = nextWord(&line)) != NULL)
00134         hashAdd(hash, word, NULL);
00135     }
00136 lineFileClose(&lf);
00137 return hash;
00138 }
00139 
00140 struct hash *hashNameIntFile(char *fileName)
00141 /* Given a two column file (name, integer value) return a
00142  * hash keyed by name with integer values */
00143 {
00144 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00145 char *row[2];
00146 struct hash *hash = hashNew(16);
00147 while (lineFileRow(lf, row))
00148     hashAddInt(hash, row[0], lineFileNeedNum(lf, row, 1));
00149 lineFileClose(&lf);
00150 return hash;
00151 }
00152 
00153 struct hash *hashTwoColumnFile(char *fileName)
00154 /* Given a two column file (key, value) return a hash. */
00155 {
00156 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00157 char *row[2];
00158 struct hash *hash = hashNew(16);
00159 while (lineFileRow(lf, row))
00160     {
00161     char *name = row[0];
00162     char *value = lmCloneString(hash->lm, row[1]);
00163     hashAdd(hash, name, value);
00164     }
00165 lineFileClose(&lf);
00166 return hash;
00167 }
00168 
00169 struct slName *readAllLines(char *fileName)
00170 /* Read all lines of file into a list.  (Removes trailing carriage return.) */
00171 {
00172 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00173 struct slName *list = NULL, *el;
00174 char *line;
00175 
00176 while (lineFileNext(lf, &line, NULL))
00177      {
00178      el = newSlName(line);
00179      slAddHead(&list, el);
00180      }
00181 slReverse(&list);
00182 return list;
00183 }
00184 
00185 void copyFile(char *source, char *dest)
00186 /* Copy file from source to dest. */
00187 {
00188 int bufSize = 64*1024;
00189 char *buf = needMem(bufSize);
00190 int bytesRead;
00191 int s, d;
00192 
00193 s = open(source, O_RDONLY);
00194 if (s < 0)
00195     errAbort("Couldn't open %s. %s\n", source, strerror(errno));
00196 d = creat(dest, 0777);
00197 if (d < 0)
00198     {
00199     close(s);
00200     errAbort("Couldn't open %s. %s\n", dest, strerror(errno));
00201     }
00202 while ((bytesRead = read(s, buf, bufSize)) > 0)
00203     {
00204     if (write(d, buf, bytesRead) < 0)
00205         errAbort("Write error on %s. %s\n", dest, strerror(errno));
00206     }
00207 close(s);
00208 if (close(d) != 0)
00209     errnoAbort("close failed");
00210 freeMem(buf);
00211 }
00212 
00213 void copyOpenFile(FILE *inFh, FILE *outFh)
00214 /* copy an open stdio file */
00215 {
00216 int c;
00217 while ((c = fgetc(inFh)) != EOF)
00218     fputc(c, outFh);
00219 if (ferror(inFh))
00220     errnoAbort("file read failed");
00221 if (ferror(outFh))
00222     errnoAbort("file write failed");
00223 }
00224 
00225 void cpFile(int s, int d)
00226 /* Copy from source file to dest until reach end of file. */
00227 {
00228 int bufSize = 64*1024, readSize;
00229 char *buf = needMem(bufSize);
00230 
00231 for (;;)
00232     {
00233     readSize = read(s, buf, bufSize);
00234     if (readSize > 0)
00235         write(d, buf, readSize);
00236     if (readSize <= 0)
00237         break;
00238     }
00239 freeMem(buf);
00240 }
00241 
00242 void *intToPt(int i)
00243 /* Convert integer to pointer. Use when really want to store an
00244  * int in a pointer field. */
00245 {
00246 char *pt = NULL;
00247 return pt+i;
00248 }
00249 
00250 int ptToInt(void *pt)
00251 /* Convert pointer to integer.  Use when really want to store a
00252  * pointer in an int. */
00253 {
00254 char *a = NULL, *b = pt;
00255 return b - a;
00256 }
00257 
00258 void *sizetToPt(size_t i)
00259 /* Convert size_t to pointer. Use when really want to store a
00260  * size_t in a pointer. */
00261 {
00262 char *pt = NULL;
00263 return pt+i;
00264 }
00265 
00266 size_t ptToSizet(void *pt)
00267 /* Convert pointer to size_t.  Use when really want to store a
00268  * pointer in a size_t. */
00269 {
00270 char *a = NULL, *b = pt;
00271 return b - a;
00272 }
00273 
00274 boolean parseQuotedString( char *in, char *out, char **retNext)
00275 /* Read quoted string from in (which should begin with first quote).
00276  * Write unquoted string to out, which may be the same as in.
00277  * Return pointer to character past end of string in *retNext. 
00278  * Return FALSE if can't find end. */
00279 {
00280 char c, *s = in;
00281 int quoteChar = *s++;
00282 boolean escaped = FALSE;
00283 
00284 for (;;)
00285    {
00286    c = *s++;
00287    if (c == 0)
00288        {
00289        warn("Unmatched %c", quoteChar);
00290        return FALSE;
00291        }
00292    if (escaped)
00293        {
00294        if (c == '\\' || c == quoteChar)
00295           *out++ = c;
00296        else
00297           {
00298           *out++ = '\\';
00299           *out++ = c;
00300           }
00301        escaped = FALSE;
00302        }
00303    else
00304        {
00305        if (c == '\\')
00306            escaped = TRUE;
00307        else if (c == quoteChar)
00308            break;
00309        else
00310            *out++ = c;
00311        }
00312    }
00313 *out = 0;
00314 if (retNext != NULL)
00315     *retNext = s;
00316 return TRUE;
00317 }
00318 
00319 char *nextQuotedWord(char **pLine)
00320 /* Generalization of nextWord.  Returns next quoted
00321  * string or if no quotes next word.  Updates *pLine
00322  * to point past word that is returned. Does not return
00323  * quotes. */
00324 {
00325 char *line, c;
00326 line = skipLeadingSpaces(*pLine);
00327 if (line == NULL || line[0] == 0)
00328     return NULL;
00329 c = *line;
00330 if (c == '"' || c == '\'')
00331     {
00332     if (!parseQuotedString(line, line, pLine))
00333         return NULL;
00334     return line;
00335     }
00336 else
00337     {
00338     return nextWord(pLine);
00339     }
00340 }
00341 
00342 void escCopy(char *in, char *out, char toEscape, char escape)
00343 /* Copy in to out, escaping as needed.  Out better be big enough. 
00344  * (Worst case is strlen(in)*2 + 1.) */
00345 {
00346 char c;
00347 for (;;)
00348     {
00349     c = *in++;
00350     if (c == toEscape)
00351         *out++ = escape;
00352     *out++ = c;
00353     if (c == 0)
00354         break;
00355     }
00356 }
00357 
00358 char *makeEscapedString(char *in, char toEscape)
00359 /* Return string that is a copy of in, but with all
00360  * toEscape characters preceded by '\' 
00361  * When done freeMem result. */
00362 {
00363 int newSize = strlen(in) + countChars(in, toEscape);
00364 char *out = needMem(newSize+1);
00365 escCopy(in, out, toEscape, '\\');
00366 return out;
00367 }
00368 
00369 char *makeQuotedString(char *in, char quoteChar)
00370 /* Create a string surrounded by quoteChar, with internal
00371  * quoteChars escaped.  freeMem result when done. */
00372 {
00373 int newSize = 2 + strlen(in) + countChars(in, quoteChar);
00374 char *out = needMem(newSize+1);
00375 out[0] = quoteChar;
00376 escCopy(in, out+1, quoteChar, '\\');
00377 out[newSize-1] = quoteChar;
00378 return out;
00379 }
00380 
00381 struct hash *hashVarLine(char *line, int lineIx)
00382 /* Return a symbol table from a line of form:
00383  *   var1=val1 var2='quoted val2' var3="another val" */
00384 {
00385 char *dupe = cloneString(line);
00386 char *s = dupe, c;
00387 char *var, *val;
00388 struct hash *hash = newHash(8);
00389 
00390 for (;;)
00391     {
00392     if ((var = skipLeadingSpaces(s)) == NULL)
00393         break;
00394 
00395     if ((c = *var) == 0)
00396         break;
00397     if (!isalpha(c))
00398         errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var);
00399     val = strchr(var, '=');
00400     if (val == NULL)
00401         {
00402         errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line);
00403         }
00404     *val++ = 0;
00405     c = *val;
00406     if (c == '\'' || c == '"')
00407         {
00408         if (!parseQuotedString(val, val, &s))
00409             errAbort("line %d of input: missing closing %c", lineIx, c);
00410         }
00411     else
00412         {
00413         s = skipToSpaces(val);
00414         if (s != NULL) *s++ = 0;
00415         }
00416     hashAdd(hash, var, cloneString(val));
00417     }
00418 freez(&dupe);
00419 return hash;
00420 }
00421 
00422 struct slName *stringToSlNames(char *string)
00423 /* Convert string to a list of slNames separated by
00424  * white space, but allowing multiple words in quotes.
00425  * Quotes if any are stripped.  */
00426 {
00427 struct slName *list = NULL, *name;
00428 char *dupe = cloneString(string);
00429 char c, *s = dupe, *e;
00430 
00431 for (;;)
00432     {
00433     if ((s = skipLeadingSpaces(s)) == NULL)
00434         break;
00435     if ((c = *s) == 0)
00436         break;
00437     if (c == '\'' || c == '"')
00438         {
00439         if (!parseQuotedString(s, s, &e))
00440             errAbort("missing closing %c in %s", c, string);
00441         }
00442     else
00443         {
00444         e = skipToSpaces(s);
00445         if (e != NULL) *e++ = 0;
00446         }
00447     name = slNameNew(s);
00448     slAddHead(&list, name);
00449     s = e;
00450     }
00451 freeMem(dupe);
00452 slReverse(&list);
00453 return list;
00454 }
00455 
00456 struct slName *charSepToSlNames(char *string, char c)
00457 /* Convert character-separated list of items to slName list. 
00458  * Note that the last occurence of c is optional.  (That
00459  * is for a comma-separated list a,b,c and a,b,c, are
00460  * equivalent. */
00461 {
00462 struct slName *list = NULL, *el;
00463 char *s, *e;
00464 
00465 s = string;
00466 while (s != NULL && s[0] != 0)
00467     {
00468     e = strchr(s, c);
00469     if (e == NULL)
00470         {
00471         el = slNameNew(s);
00472         slAddHead(&list, el);
00473         break;
00474         }
00475     else
00476         {
00477         el = slNameNewN(s, e - s);
00478         slAddHead(&list, el);
00479         s = e+1;
00480         }
00481     }
00482 slReverse(&list);
00483 return list;
00484 }
00485 
00486 struct slName *commaSepToSlNames(char *commaSep)
00487 /* Convert comma-separated list of items to slName list.  */
00488 {
00489 return charSepToSlNames(commaSep, ',');
00490 }
00491 
00492 
00493 void sprintLongWithCommas(char *s, long long l)
00494 /* Print out a long number with commas a thousands, millions, etc. */
00495 {
00496 long long billions, millions, thousands;
00497 if (l >= 1000000000)
00498     {
00499     billions = l/1000000000;
00500     l -= billions * 1000000000;
00501     millions = l/1000000;
00502     l -= millions * 1000000;
00503     thousands = l/1000;
00504     l -= thousands * 1000;
00505     sprintf(s, "%lld,%03lld,%03lld,%03lld", billions, millions, thousands, l);
00506     }
00507 else if (l >= 1000000)
00508     {
00509     millions = l/1000000;
00510     l -= millions * (long long)1000000;
00511     thousands = l/1000;
00512     l -= thousands * 1000;
00513     sprintf(s, "%lld,%03lld,%03lld", millions, thousands, l);
00514     }
00515 else if (l >= 1000)
00516     {
00517     thousands = l/1000;
00518     l -= thousands * 1000;
00519     sprintf(s, "%lld,%03lld", thousands, l);
00520     }
00521 else
00522     sprintf(s, "%lld", l);
00523 }
00524 
00525 void printLongWithCommas(FILE *f, long long l)
00526 /* Print out a long number with commas a thousands, millions, etc. */
00527 {
00528 char ascii[32];
00529 sprintLongWithCommas(ascii, l);
00530 fprintf(f, "%s", ascii);
00531 }
00532 
00533 void shuffleArrayOfPointers(void *pointerArray, int arraySize, int shuffleCount)
00534 /* Shuffle array of pointers of given size given number of times. */
00535 {
00536 void **array = pointerArray, *pt;
00537 int i, randIx;
00538 
00539 for (i=0; i<arraySize; ++i)
00540     {
00541     randIx = rand() % arraySize;
00542     pt = array[i];
00543     array[i] = array[randIx];
00544     array[randIx] = pt;
00545     }
00546 }
00547 
00548 void shuffleList(void *pList, int shuffleCount)
00549 /* Randomize order of slList.  Usage:
00550  *     randomizeList(&list)
00551  * where list is a pointer to a structure that
00552  * begins with a next field. */
00553 {
00554 struct slList **pL = (struct slList **)pList;
00555 struct slList *list = *pL;
00556 int count;
00557 count = slCount(list);
00558 if (count > 1)
00559     {
00560     struct slList *el;
00561     struct slList **array;
00562     int i;
00563     array = needLargeMem(count * sizeof(*array));
00564     for (el = list, i=0; el != NULL; el = el->next, i++)
00565         array[i] = el;
00566     for (i=0; i<4; ++i)
00567         shuffleArrayOfPointers(array, count, shuffleCount);
00568     list = NULL;
00569     for (i=0; i<count; ++i)
00570         {
00571         array[i]->next = list;
00572         list = array[i];
00573         }
00574     freeMem(array);
00575     slReverse(&list);
00576     *pL = list;       
00577     }
00578 }
00579 
00580 char *stripCommas(char *position)
00581 /* make a new string with commas stripped out */
00582 {
00583 char *newPos = cloneString(position);
00584 char *nPtr = newPos;
00585 
00586 if (position == NULL)
00587     return NULL;
00588 while((*nPtr = *position++))
00589     if (*nPtr != ',')
00590         nPtr++;
00591 
00592 return newPos;
00593 }
00594 
00595 void dotForUserInit(int dotMod)
00596 /* Set how often dotForUser() outputs a dot. */
00597 {
00598 assert(dotMod > 0);
00599 _dotForUserMod = dotMod;
00600 }
00601 
00602 void dotForUser()
00603 /* Write out a dot every _dotForUserMod times this is called. */
00604 {
00605 static int dot = -10;
00606 /* Check to see if dot has been initialized. */
00607 if(dot == - 10)
00608     dot = _dotForUserMod;
00609 
00610 if (--dot <= 0)
00611     {
00612     putc('.', stderr);
00613     fflush(stderr);
00614     dot = _dotForUserMod;
00615     }
00616 }
00617 
00618 void spaceToUnderbar(char *s)
00619 /* Convert white space to underbar. */
00620 {
00621 char c;
00622 while ((c = *s) != 0)
00623     {
00624     if (isspace(c))
00625         *s = '_';
00626     ++s;
00627     }
00628 }
00629 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2