00001
00002
00003
00004
00005
00006 #include "common.h"
00007 #include <unistd.h>
00008 #include "portable.h"
00009 #include "localmem.h"
00010 #include "hash.h"
00011 #include "obscure.h"
00012 #include "linefile.h"
00013
00014 static char const rcsid[] = "$Id: obscure.c,v 1.44 2007/03/02 00:45:01 kent Exp $";
00015 static int _dotForUserMod = 100;
00016
00017 long incCounterFile(char *fileName)
00018
00019 {
00020 long val = 0;
00021 FILE *f = fopen(fileName, "r+b");
00022 if (f != NULL)
00023 {
00024 fread(&val, sizeof(val), 1, f);
00025 rewind(f);
00026 }
00027 else
00028 {
00029 f = fopen(fileName, "wb");
00030 }
00031 ++val;
00032 if (f != NULL)
00033 {
00034 fwrite(&val, sizeof(val), 1, f);
00035 if (fclose(f) != 0)
00036 errnoAbort("fclose failed");
00037 }
00038 return val;
00039 }
00040
00041 int digitsBaseTwo(unsigned long x)
00042
00043 {
00044 int digits = 0;
00045 while (x)
00046 {
00047 digits += 1;
00048 x >>= 1;
00049 }
00050 return digits;
00051 }
00052
00053 int digitsBaseTen(int x)
00054
00055 {
00056 int digCount = 1;
00057 if (x < 0)
00058 {
00059 digCount = 2;
00060 x = -x;
00061 }
00062 while (x >= 10)
00063 {
00064 digCount += 1;
00065 x /= 10;
00066 }
00067 return digCount;
00068 }
00069
00070 void writeGulp(char *file, char *buf, int size)
00071
00072 {
00073 FILE *f = mustOpen(file, "w");
00074 mustWrite(f, buf, size);
00075 carefulClose(&f);
00076 }
00077
00078 void readInGulp(char *fileName, char **retBuf, size_t *retSize)
00079
00080 {
00081 size_t size = (size_t)fileSize(fileName);
00082 char *buf;
00083 FILE *f = mustOpen(fileName, "rb");
00084 *retBuf = buf = needLargeMem(size+1);
00085 mustRead(f, buf, size);
00086 buf[size] = 0;
00087 fclose(f);
00088 if (retSize != NULL)
00089 *retSize = size;
00090 }
00091
00092 void readAllWords(char *fileName, char ***retWords, int *retWordCount, char **retBuf)
00093
00094
00095 {
00096 int wordCount;
00097 char *buf = NULL;
00098 char **words = NULL;
00099 size_t bufSize;
00100
00101 readInGulp(fileName, &buf, &bufSize);
00102 wordCount = chopByWhite(buf, NULL, 0);
00103 if (wordCount != 0)
00104 {
00105 words = needMem(wordCount * sizeof(words[0]));
00106 chopByWhite(buf, words, wordCount);
00107 }
00108 *retWords = words;
00109 *retWordCount = wordCount;
00110 *retBuf = buf;
00111 }
00112
00113 int countWordsInFile(char *fileName)
00114
00115 {
00116 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00117 char *line;
00118 int wordCount = 0;
00119 while (lineFileNext(lf, &line, NULL))
00120 wordCount += chopByWhite(line, NULL, 0);
00121 lineFileClose(&lf);
00122 return wordCount;
00123 }
00124
00125 struct hash *hashWordsInFile(char *fileName, int hashSize)
00126
00127 {
00128 struct hash *hash = newHash(hashSize);
00129 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00130 char *line, *word;
00131 while (lineFileNext(lf, &line, NULL))
00132 {
00133 while ((word = nextWord(&line)) != NULL)
00134 hashAdd(hash, word, NULL);
00135 }
00136 lineFileClose(&lf);
00137 return hash;
00138 }
00139
00140 struct hash *hashNameIntFile(char *fileName)
00141
00142
00143 {
00144 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00145 char *row[2];
00146 struct hash *hash = hashNew(16);
00147 while (lineFileRow(lf, row))
00148 hashAddInt(hash, row[0], lineFileNeedNum(lf, row, 1));
00149 lineFileClose(&lf);
00150 return hash;
00151 }
00152
00153 struct hash *hashTwoColumnFile(char *fileName)
00154
00155 {
00156 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00157 char *row[2];
00158 struct hash *hash = hashNew(16);
00159 while (lineFileRow(lf, row))
00160 {
00161 char *name = row[0];
00162 char *value = lmCloneString(hash->lm, row[1]);
00163 hashAdd(hash, name, value);
00164 }
00165 lineFileClose(&lf);
00166 return hash;
00167 }
00168
00169 struct slName *readAllLines(char *fileName)
00170
00171 {
00172 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00173 struct slName *list = NULL, *el;
00174 char *line;
00175
00176 while (lineFileNext(lf, &line, NULL))
00177 {
00178 el = newSlName(line);
00179 slAddHead(&list, el);
00180 }
00181 slReverse(&list);
00182 return list;
00183 }
00184
00185 void copyFile(char *source, char *dest)
00186
00187 {
00188 int bufSize = 64*1024;
00189 char *buf = needMem(bufSize);
00190 int bytesRead;
00191 int s, d;
00192
00193 s = open(source, O_RDONLY);
00194 if (s < 0)
00195 errAbort("Couldn't open %s. %s\n", source, strerror(errno));
00196 d = creat(dest, 0777);
00197 if (d < 0)
00198 {
00199 close(s);
00200 errAbort("Couldn't open %s. %s\n", dest, strerror(errno));
00201 }
00202 while ((bytesRead = read(s, buf, bufSize)) > 0)
00203 {
00204 if (write(d, buf, bytesRead) < 0)
00205 errAbort("Write error on %s. %s\n", dest, strerror(errno));
00206 }
00207 close(s);
00208 if (close(d) != 0)
00209 errnoAbort("close failed");
00210 freeMem(buf);
00211 }
00212
00213 void copyOpenFile(FILE *inFh, FILE *outFh)
00214
00215 {
00216 int c;
00217 while ((c = fgetc(inFh)) != EOF)
00218 fputc(c, outFh);
00219 if (ferror(inFh))
00220 errnoAbort("file read failed");
00221 if (ferror(outFh))
00222 errnoAbort("file write failed");
00223 }
00224
00225 void cpFile(int s, int d)
00226
00227 {
00228 int bufSize = 64*1024, readSize;
00229 char *buf = needMem(bufSize);
00230
00231 for (;;)
00232 {
00233 readSize = read(s, buf, bufSize);
00234 if (readSize > 0)
00235 write(d, buf, readSize);
00236 if (readSize <= 0)
00237 break;
00238 }
00239 freeMem(buf);
00240 }
00241
00242 void *intToPt(int i)
00243
00244
00245 {
00246 char *pt = NULL;
00247 return pt+i;
00248 }
00249
00250 int ptToInt(void *pt)
00251
00252
00253 {
00254 char *a = NULL, *b = pt;
00255 return b - a;
00256 }
00257
00258 void *sizetToPt(size_t i)
00259
00260
00261 {
00262 char *pt = NULL;
00263 return pt+i;
00264 }
00265
00266 size_t ptToSizet(void *pt)
00267
00268
00269 {
00270 char *a = NULL, *b = pt;
00271 return b - a;
00272 }
00273
00274 boolean parseQuotedString( char *in, char *out, char **retNext)
00275
00276
00277
00278
00279 {
00280 char c, *s = in;
00281 int quoteChar = *s++;
00282 boolean escaped = FALSE;
00283
00284 for (;;)
00285 {
00286 c = *s++;
00287 if (c == 0)
00288 {
00289 warn("Unmatched %c", quoteChar);
00290 return FALSE;
00291 }
00292 if (escaped)
00293 {
00294 if (c == '\\' || c == quoteChar)
00295 *out++ = c;
00296 else
00297 {
00298 *out++ = '\\';
00299 *out++ = c;
00300 }
00301 escaped = FALSE;
00302 }
00303 else
00304 {
00305 if (c == '\\')
00306 escaped = TRUE;
00307 else if (c == quoteChar)
00308 break;
00309 else
00310 *out++ = c;
00311 }
00312 }
00313 *out = 0;
00314 if (retNext != NULL)
00315 *retNext = s;
00316 return TRUE;
00317 }
00318
00319 char *nextQuotedWord(char **pLine)
00320
00321
00322
00323
00324 {
00325 char *line, c;
00326 line = skipLeadingSpaces(*pLine);
00327 if (line == NULL || line[0] == 0)
00328 return NULL;
00329 c = *line;
00330 if (c == '"' || c == '\'')
00331 {
00332 if (!parseQuotedString(line, line, pLine))
00333 return NULL;
00334 return line;
00335 }
00336 else
00337 {
00338 return nextWord(pLine);
00339 }
00340 }
00341
00342 void escCopy(char *in, char *out, char toEscape, char escape)
00343
00344
00345 {
00346 char c;
00347 for (;;)
00348 {
00349 c = *in++;
00350 if (c == toEscape)
00351 *out++ = escape;
00352 *out++ = c;
00353 if (c == 0)
00354 break;
00355 }
00356 }
00357
00358 char *makeEscapedString(char *in, char toEscape)
00359
00360
00361
00362 {
00363 int newSize = strlen(in) + countChars(in, toEscape);
00364 char *out = needMem(newSize+1);
00365 escCopy(in, out, toEscape, '\\');
00366 return out;
00367 }
00368
00369 char *makeQuotedString(char *in, char quoteChar)
00370
00371
00372 {
00373 int newSize = 2 + strlen(in) + countChars(in, quoteChar);
00374 char *out = needMem(newSize+1);
00375 out[0] = quoteChar;
00376 escCopy(in, out+1, quoteChar, '\\');
00377 out[newSize-1] = quoteChar;
00378 return out;
00379 }
00380
00381 struct hash *hashVarLine(char *line, int lineIx)
00382
00383
00384 {
00385 char *dupe = cloneString(line);
00386 char *s = dupe, c;
00387 char *var, *val;
00388 struct hash *hash = newHash(8);
00389
00390 for (;;)
00391 {
00392 if ((var = skipLeadingSpaces(s)) == NULL)
00393 break;
00394
00395 if ((c = *var) == 0)
00396 break;
00397 if (!isalpha(c))
00398 errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var);
00399 val = strchr(var, '=');
00400 if (val == NULL)
00401 {
00402 errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line);
00403 }
00404 *val++ = 0;
00405 c = *val;
00406 if (c == '\'' || c == '"')
00407 {
00408 if (!parseQuotedString(val, val, &s))
00409 errAbort("line %d of input: missing closing %c", lineIx, c);
00410 }
00411 else
00412 {
00413 s = skipToSpaces(val);
00414 if (s != NULL) *s++ = 0;
00415 }
00416 hashAdd(hash, var, cloneString(val));
00417 }
00418 freez(&dupe);
00419 return hash;
00420 }
00421
00422 struct slName *stringToSlNames(char *string)
00423
00424
00425
00426 {
00427 struct slName *list = NULL, *name;
00428 char *dupe = cloneString(string);
00429 char c, *s = dupe, *e;
00430
00431 for (;;)
00432 {
00433 if ((s = skipLeadingSpaces(s)) == NULL)
00434 break;
00435 if ((c = *s) == 0)
00436 break;
00437 if (c == '\'' || c == '"')
00438 {
00439 if (!parseQuotedString(s, s, &e))
00440 errAbort("missing closing %c in %s", c, string);
00441 }
00442 else
00443 {
00444 e = skipToSpaces(s);
00445 if (e != NULL) *e++ = 0;
00446 }
00447 name = slNameNew(s);
00448 slAddHead(&list, name);
00449 s = e;
00450 }
00451 freeMem(dupe);
00452 slReverse(&list);
00453 return list;
00454 }
00455
00456 struct slName *charSepToSlNames(char *string, char c)
00457
00458
00459
00460
00461 {
00462 struct slName *list = NULL, *el;
00463 char *s, *e;
00464
00465 s = string;
00466 while (s != NULL && s[0] != 0)
00467 {
00468 e = strchr(s, c);
00469 if (e == NULL)
00470 {
00471 el = slNameNew(s);
00472 slAddHead(&list, el);
00473 break;
00474 }
00475 else
00476 {
00477 el = slNameNewN(s, e - s);
00478 slAddHead(&list, el);
00479 s = e+1;
00480 }
00481 }
00482 slReverse(&list);
00483 return list;
00484 }
00485
00486 struct slName *commaSepToSlNames(char *commaSep)
00487
00488 {
00489 return charSepToSlNames(commaSep, ',');
00490 }
00491
00492
00493 void sprintLongWithCommas(char *s, long long l)
00494
00495 {
00496 long long billions, millions, thousands;
00497 if (l >= 1000000000)
00498 {
00499 billions = l/1000000000;
00500 l -= billions * 1000000000;
00501 millions = l/1000000;
00502 l -= millions * 1000000;
00503 thousands = l/1000;
00504 l -= thousands * 1000;
00505 sprintf(s, "%lld,%03lld,%03lld,%03lld", billions, millions, thousands, l);
00506 }
00507 else if (l >= 1000000)
00508 {
00509 millions = l/1000000;
00510 l -= millions * (long long)1000000;
00511 thousands = l/1000;
00512 l -= thousands * 1000;
00513 sprintf(s, "%lld,%03lld,%03lld", millions, thousands, l);
00514 }
00515 else if (l >= 1000)
00516 {
00517 thousands = l/1000;
00518 l -= thousands * 1000;
00519 sprintf(s, "%lld,%03lld", thousands, l);
00520 }
00521 else
00522 sprintf(s, "%lld", l);
00523 }
00524
00525 void printLongWithCommas(FILE *f, long long l)
00526
00527 {
00528 char ascii[32];
00529 sprintLongWithCommas(ascii, l);
00530 fprintf(f, "%s", ascii);
00531 }
00532
00533 void shuffleArrayOfPointers(void *pointerArray, int arraySize, int shuffleCount)
00534
00535 {
00536 void **array = pointerArray, *pt;
00537 int i, randIx;
00538
00539 for (i=0; i<arraySize; ++i)
00540 {
00541 randIx = rand() % arraySize;
00542 pt = array[i];
00543 array[i] = array[randIx];
00544 array[randIx] = pt;
00545 }
00546 }
00547
00548 void shuffleList(void *pList, int shuffleCount)
00549
00550
00551
00552
00553 {
00554 struct slList **pL = (struct slList **)pList;
00555 struct slList *list = *pL;
00556 int count;
00557 count = slCount(list);
00558 if (count > 1)
00559 {
00560 struct slList *el;
00561 struct slList **array;
00562 int i;
00563 array = needLargeMem(count * sizeof(*array));
00564 for (el = list, i=0; el != NULL; el = el->next, i++)
00565 array[i] = el;
00566 for (i=0; i<4; ++i)
00567 shuffleArrayOfPointers(array, count, shuffleCount);
00568 list = NULL;
00569 for (i=0; i<count; ++i)
00570 {
00571 array[i]->next = list;
00572 list = array[i];
00573 }
00574 freeMem(array);
00575 slReverse(&list);
00576 *pL = list;
00577 }
00578 }
00579
00580 char *stripCommas(char *position)
00581
00582 {
00583 char *newPos = cloneString(position);
00584 char *nPtr = newPos;
00585
00586 if (position == NULL)
00587 return NULL;
00588 while((*nPtr = *position++))
00589 if (*nPtr != ',')
00590 nPtr++;
00591
00592 return newPos;
00593 }
00594
00595 void dotForUserInit(int dotMod)
00596
00597 {
00598 assert(dotMod > 0);
00599 _dotForUserMod = dotMod;
00600 }
00601
00602 void dotForUser()
00603
00604 {
00605 static int dot = -10;
00606
00607 if(dot == - 10)
00608 dot = _dotForUserMod;
00609
00610 if (--dot <= 0)
00611 {
00612 putc('.', stderr);
00613 fflush(stderr);
00614 dot = _dotForUserMod;
00615 }
00616 }
00617
00618 void spaceToUnderbar(char *s)
00619
00620 {
00621 char c;
00622 while ((c = *s) != 0)
00623 {
00624 if (isspace(c))
00625 *s = '_';
00626 ++s;
00627 }
00628 }
00629