00001
00002
00003
00004
00005 #include "common.h"
00006 #include "localmem.h"
00007 #include "sig.h"
00008 #include "fof.h"
00009
00010 static char const rcsid[] = "$Id: fof.c,v 1.7 2006/03/10 17:43:36 angie Exp $";
00011
00012 struct fofRecord
00013
00014 {
00015 bits32 offset;
00016 bits32 size;
00017 UBYTE fileIx;
00018 char name[1];
00019 };
00020
00021 struct fof
00022
00023
00024 {
00025 char *name;
00026 char *relDir;
00027 char **fileNames;
00028 FILE **files;
00029 FILE *f;
00030 int fileCount;
00031 int endIx;
00032 int maxNameSize;
00033 int itemSize;
00034 long headSize;
00035 struct fofRecord *rec;
00036 struct fofRecord *first;
00037 struct fofRecord *last;
00038 };
00039
00040
00041 static void readStringZ(FILE *f, char *s, int maxLen)
00042
00043 {
00044 int c;
00045 int ix;
00046 maxLen -= 1;
00047
00048 for (ix = 0; ix <maxLen; ++ix)
00049 {
00050 if ((c = fgetc(f)) == EOF)
00051 errAbort("Unexpected EOF in readStringZ");
00052 if (c == 0)
00053 break;
00054 s[ix] = c;
00055 }
00056 if (ix == maxLen)
00057 errAbort("String too long in readStringZ");
00058 s[ix] = 0;
00059 }
00060
00061
00062 struct fof *fofOpen(char *fofName, char *fofDir)
00063
00064
00065 {
00066 bits32 sig, elCount;
00067 bits16 fileCount, maxNameSize;
00068 FILE *f;
00069 char nameBuf[512];
00070 char pathBuf[512];
00071 struct fof *fof;
00072 int i;
00073
00074
00075
00076 if (fofDir == NULL)
00077 {
00078 fofDir = "";
00079 }
00080
00081
00082 safef(pathBuf, sizeof(pathBuf), "%s%s", fofDir, fofName);
00083 f = mustOpen(pathBuf, "rb");
00084 mustReadOne(f, sig);
00085 if (sig != fofSig)
00086 errAbort("Bad signature on %s", pathBuf);
00087 mustReadOne(f, elCount);
00088
00089
00090 mustReadOne(f, fileCount);
00091 if (fileCount > 12)
00092 warn("%d files indexed in fof %s!?", fileCount, fofName);
00093 mustReadOne(f, maxNameSize);
00094 if (maxNameSize > 40)
00095 warn("%d maxName size in fof %s!?", maxNameSize, fofName);
00096 AllocVar(fof);
00097 fof->name = cloneString(fofName);
00098 fof->relDir = cloneString(fofDir);
00099 fof->fileNames = needMem(fileCount * sizeof(fof->fileNames[0]));
00100 fof->files = needMem(fileCount * sizeof(fof->files[0]));
00101 fof->f = f;
00102 fof->fileCount = fileCount;
00103 fof->endIx = elCount-1;
00104 fof->maxNameSize = maxNameSize;
00105 fof->itemSize = sizeof(bits32) +sizeof(bits32) + sizeof(UBYTE) + maxNameSize;
00106 fof->rec = needMem(sizeof(*fof->rec) + maxNameSize);
00107 fof->first = needMem(sizeof(*fof->rec) + maxNameSize);
00108 fof->last = needMem(sizeof(*fof->rec) + maxNameSize);
00109
00110
00111 for (i=0; i<fileCount; ++i)
00112 {
00113 readStringZ(f, nameBuf, sizeof(nameBuf));
00114 safef(pathBuf, sizeof(pathBuf), "%s%s", fofDir, nameBuf);
00115 fof->fileNames[i] = cloneString(pathBuf);
00116 }
00117 fof->headSize = ftell(f);
00118
00119
00120 mustRead(f, fof->first, fof->itemSize);
00121 fseek(f, fof->headSize + fof->endIx*fof->itemSize, SEEK_SET);
00122 mustRead(f, fof->last, fof->itemSize);
00123
00124
00125 return fof;
00126 }
00127
00128
00129 void fofClose(struct fof **pFof)
00130
00131 {
00132 struct fof *fof = *pFof;
00133 if (fof != NULL)
00134 {
00135 int fileCount = fof->fileCount;
00136 int i;
00137
00138 for (i=0; i<fileCount; ++i)
00139 {
00140 freeMem(fof->fileNames[i]);
00141 carefulClose(&fof->files[i]);
00142 }
00143 freeMem(fof->name);
00144 freeMem(fof->fileNames);
00145 freeMem(fof->files);
00146 freeMem(fof->rec);
00147 freeMem(fof->first);
00148 freeMem(fof->last);
00149 carefulClose(&fof->f);
00150 freez(pFof);
00151 }
00152 }
00153
00154 int fofElementCount(struct fof *fof)
00155
00156 {
00157 return fof->endIx + 1;
00158 }
00159
00160 static void fofRecToPos(struct fof *fof, int ix, struct fofRecord *rec, struct fofPos *pos)
00161
00162 {
00163 int fileIx = rec->fileIx;
00164 FILE *f;
00165
00166 pos->indexIx = ix;
00167 pos->offset = rec->offset;
00168 pos->size = rec->size;
00169 pos->fileName = fof->fileNames[fileIx];
00170 if ((f = fof->files[fileIx]) != NULL)
00171 {
00172 pos->f = f;
00173 }
00174 else
00175 {
00176 pos->f = fof->files[fileIx] = mustOpen(fof->fileNames[fileIx], "rb");
00177 }
00178 return;
00179 }
00180
00181
00182 static int fofCmp(char *prefix, char *name, int maxSize, boolean isPrefix)
00183
00184 {
00185 if (isPrefix)
00186 return memcmp(prefix, name, maxSize);
00187 else
00188 return strcmp(prefix, name);
00189 }
00190
00191 static boolean fofSearch(struct fof *fof, char *name, int nameSize,
00192 boolean isPrefix, struct fofPos *retPos)
00193
00194
00195 {
00196 struct fofRecord *rec = fof->rec;
00197 int startIx, endIx, midIx;
00198 int cmp;
00199 int itemSize = fof->itemSize;
00200 FILE *f = fof->f;
00201 int headSize = fof->headSize;
00202
00203
00204 if (nameSize > fof->maxNameSize)
00205 nameSize = fof->maxNameSize;
00206
00207
00208 startIx = 0;
00209 endIx = fof->endIx;
00210
00211
00212 if (fofCmp(name, fof->first->name, nameSize, isPrefix) == 0)
00213 {
00214 fofRecToPos(fof, startIx, fof->first, retPos);
00215 return TRUE;
00216 }
00217 if (fofCmp(name, fof->last->name, nameSize, isPrefix) == 0)
00218 {
00219 fofRecToPos(fof, endIx, fof->last, retPos);
00220 return TRUE;
00221 }
00222
00223
00224 for (;;)
00225 {
00226 midIx = (startIx + endIx ) / 2;
00227 if (midIx == startIx || midIx == endIx)
00228 return FALSE;
00229 fseek(f, headSize + midIx*itemSize, SEEK_SET);
00230 mustRead(f, rec, itemSize);
00231 cmp = fofCmp(name, rec->name, nameSize, isPrefix);
00232 if (cmp == 0)
00233 {
00234 fofRecToPos(fof, midIx, rec, retPos);
00235 return TRUE;
00236 }
00237 else if (cmp > 0)
00238 {
00239 startIx = midIx;
00240 }
00241 else
00242 {
00243 endIx = midIx;
00244 }
00245 }
00246 }
00247
00248 boolean fofFindFirst(struct fof *fof, char *prefix,
00249 int prefixSize, struct fofPos *retPos)
00250
00251 {
00252 int ix;
00253 struct fofRecord *rec = fof->rec;
00254 FILE *f = fof->f;
00255 int itemSize = fof->itemSize;
00256 int headSize = fof->headSize;
00257
00258
00259 if (!fofSearch(fof, prefix, prefixSize, TRUE, retPos))
00260 return FALSE;
00261
00262
00263 ix = retPos->indexIx;
00264 while (--ix >= 0)
00265 {
00266 fseek(f, headSize + ix*itemSize, SEEK_SET);
00267 mustRead(f, rec, itemSize);
00268 if (memcmp(prefix, rec->name, prefixSize) != 0)
00269 break;
00270 }
00271
00272
00273 ++ix;
00274 fseek(f, headSize + ix*itemSize, SEEK_SET);
00275 mustRead(f, rec, itemSize);
00276 fofRecToPos(fof, ix, rec, retPos);
00277 return TRUE;
00278 }
00279
00280
00281 boolean fofFind(struct fof *fof, char *name, struct fofPos *retPos)
00282
00283
00284 {
00285 return fofSearch(fof, name, strlen(name), FALSE, retPos);
00286 }
00287
00288 void *fofFetch(struct fof *fof, char *name, int *retSize)
00289
00290
00291
00292 {
00293 struct fofPos pos;
00294 void *s;
00295
00296 if (!fofFind(fof, name, &pos))
00297 errAbort("Couldn't find %s in %s", name, fof->name);
00298 s = needLargeMem(pos.size);
00299 fseek(pos.f, pos.offset, SEEK_SET);
00300 mustRead(pos.f, s, pos.size);
00301 *retSize = pos.size;
00302 return s;
00303 }
00304
00305 char *fofFetchString(struct fof *fof, char *name, int *retSize)
00306
00307
00308
00309 {
00310 struct fofPos pos;
00311 char *s;
00312
00313 if (!fofFind(fof, name, &pos))
00314 errAbort("Couldn't find %s in %s", name, fof->name);
00315 s = needLargeMem(pos.size+1);
00316 fseek(pos.f, pos.offset, SEEK_SET);
00317 mustRead(pos.f, s, pos.size);
00318 s[pos.size] = 0;
00319 *retSize = pos.size;
00320 return s;
00321 }
00322
00323
00324 static int cmpOnKey(const void *va, const void *vb)
00325
00326
00327 {
00328 const struct fofBatch *a = *((struct fofBatch **)va);
00329 const struct fofBatch *b = *((struct fofBatch **)vb);
00330 return strcmp(a->key, b->key);
00331 }
00332
00333 static int cmpOnFilePos(const void *va, const void *vb)
00334
00335
00336 {
00337 const struct fofBatch *a = *((struct fofBatch **)va);
00338 const struct fofBatch *b = *((struct fofBatch **)vb);
00339 int dif = a->f - b->f;
00340 if (dif == 0)
00341 dif = a->offset - b->offset;
00342 return dif;
00343 }
00344
00345 static void elFromRec(struct fof *fof, struct fofRecord *rec, struct fofBatch *el)
00346
00347 {
00348 FILE *ff;
00349 int fileIx = rec->fileIx;
00350 if ((ff = fof->files[fileIx]) != NULL)
00351 {
00352 el->f = ff;
00353 }
00354 else
00355 {
00356 el->f = fof->files[fileIx] = mustOpen(fof->fileNames[fileIx], "rb");
00357 }
00358 el->offset = rec->offset;
00359 el->size = rec->size;
00360 }
00361
00362 struct fofBatch *fofBatchFind(struct fof *fof, struct fofBatch *list)
00363
00364 {
00365 struct fofBatch *el;
00366 FILE *f = fof->f;
00367 struct fofRecord *rec = fof->rec;
00368 int itemSize = fof->itemSize;
00369 char *lastKey = "";
00370
00371 slSort(&list, cmpOnKey);
00372 fseek(f, fof->headSize, SEEK_SET);
00373 for (el = list; el != NULL; el = el->next)
00374 {
00375 char *key = el->key;
00376 if (sameString(key, lastKey))
00377 {
00378 elFromRec(fof, rec, el);
00379 }
00380 else
00381 {
00382 for (;;)
00383 {
00384 mustRead(f, rec, itemSize);
00385 if (sameString(key, rec->name))
00386 {
00387 elFromRec(fof, rec, el);
00388 lastKey = key;
00389 break;
00390 }
00391 }
00392 }
00393 }
00394 slSort(&list, cmpOnFilePos);
00395 return list;
00396 }
00397
00398
00399
00400
00401 struct lm *localMem;
00402
00403 struct fofRecList
00404
00405 {
00406 struct fofRecList *next;
00407 struct fofRecord rec;
00408 };
00409
00410 static int cmpRecList(const void *va, const void *vb)
00411
00412
00413 {
00414 const struct fofRecList *a = *((struct fofRecList **)va);
00415 const struct fofRecList *b = *((struct fofRecList **)vb);
00416 int dif;
00417 dif = strcmp(a->rec.name, b->rec.name);
00418 if (dif == 0)
00419 {
00420 UBYTE ao = a->rec.fileIx;
00421 UBYTE bo = b->rec.fileIx;
00422 if (ao < bo)
00423 dif = -1;
00424 else if (ao > bo)
00425 dif = 1;
00426 else
00427 {
00428 bits32 ao = a->rec.offset;
00429 bits32 bo = b->rec.offset;
00430 if (ao < bo)
00431 dif = -1;
00432 else if (ao == bo)
00433 dif = 0;
00434 else
00435 dif = 1;
00436 }
00437 }
00438 return dif;
00439 }
00440
00441 static bits16 maxNameSize;
00442
00443 struct fofRecList *newFofRecEl(int fileIx, long offset, long size, char *name, int nameLen)
00444
00445 {
00446 struct fofRecList *fr;
00447
00448 if (maxNameSize < nameLen)
00449 maxNameSize = nameLen;
00450 fr = lmAlloc(localMem, sizeof(*fr) + nameLen);
00451 fr->rec.offset = offset;
00452 fr->rec.size = size;
00453 fr->rec.fileIx = fileIx;
00454 memcpy(fr->rec.name, name, nameLen);
00455 return fr;
00456 }
00457
00458 void fofMake(char *inFiles[], int inCount, char *outName,
00459 boolean (*readHeader)(FILE *inFile, void *data),
00460 boolean (*nextRecord)(FILE *inFile, void *data, char **rName, int *rNameLen),
00461 void *data, boolean dupeOk)
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475 {
00476 FILE *out;
00477 bits32 sig = fofSig;
00478 bits32 elCount = 0;
00479 bits16 fileCount = inCount;
00480 struct fofRecList *recList = NULL, *rl;
00481 int i, fileIx, itemSize;
00482 char *lastName = "";
00483 int maxMod = 10000;
00484
00485
00486 localMem = lmInit(0);
00487 maxNameSize = 0;
00488
00489
00490 for (fileIx = 0; fileIx<inCount; ++fileIx)
00491 {
00492 char *inName = inFiles[fileIx];
00493 FILE *in = mustOpen(inName, "rb");
00494 bits32 start, end;
00495 char *name;
00496 int nameLen;
00497 int mod = maxMod;
00498
00499 printf("Processing %s\n", inName);
00500 if (readHeader)
00501 readHeader(in, data);
00502 start = ftell(in);
00503 while (nextRecord(in, data, &name, &nameLen))
00504 {
00505 if (--mod == 0)
00506 {
00507 putc('.', stdout);
00508 fflush(stdout);
00509 mod = maxMod;
00510 }
00511 end = ftell(in);
00512 if (nameLen > 0)
00513 {
00514 rl = newFofRecEl(fileIx, start, end-start, name, nameLen);
00515 slAddHead(&recList, rl);
00516 }
00517 start = end;
00518 }
00519 fclose(in);
00520 printf("\n");
00521 }
00522
00523 printf("sorting\n");
00524 slSort(&recList, cmpRecList);
00525
00526
00527 if (dupeOk)
00528 elCount = slCount(recList);
00529 else
00530 {
00531 lastName = "";
00532 for (rl = recList; rl != NULL; rl = rl->next)
00533 {
00534 char *name = rl->rec.name;
00535 if (!sameString(name, lastName))
00536 {
00537 ++elCount;
00538 lastName = name;
00539 }
00540 }
00541 }
00542
00543
00544 printf("Writing %s\n", outName);
00545 out = mustOpen(outName, "wb");
00546 writeOne(out, sig);
00547 writeOne(out, elCount);
00548 writeOne(out, fileCount);
00549 writeOne(out, maxNameSize);
00550 itemSize = sizeof(bits32) +sizeof(bits32) + sizeof(UBYTE) + maxNameSize;
00551 for (i=0; i<inCount; ++i)
00552 {
00553 char *name = inFiles[i];
00554 int len = strlen(name)+1;
00555 mustWrite(out, name, len);
00556 }
00557 lastName = "";
00558 for (rl = recList; rl != NULL; rl = rl->next)
00559 {
00560 if (!dupeOk)
00561 {
00562 char *name = rl->rec.name;
00563 if (sameString(name, lastName))
00564 {
00565 warn("Duplicate %s only saving first.", name);
00566 continue;
00567 }
00568 else
00569 lastName = name;
00570 }
00571 writeOne(out, rl->rec.offset);
00572 writeOne(out, rl->rec.size);
00573 writeOne(out, rl->rec.fileIx);
00574 mustWrite(out, rl->rec.name, maxNameSize);
00575 }
00576 if (fclose(out) != 0)
00577 errnoAbort("fclose failed");
00578
00579 lmCleanup(&localMem);
00580 }
00581