#include "common.h"#include "localmem.h"#include "sig.h"#include "fof.h"Include dependency graph for fof.c:

Go to the source code of this file.
Data Structures | |
| struct | fofRecord |
| struct | fof |
| struct | fofRecList |
Functions | |
| static void | readStringZ (FILE *f, char *s, int maxLen) |
| fof * | fofOpen (char *fofName, char *fofDir) |
| void | fofClose (struct fof **pFof) |
| int | fofElementCount (struct fof *fof) |
| static void | fofRecToPos (struct fof *fof, int ix, struct fofRecord *rec, struct fofPos *pos) |
| static int | fofCmp (char *prefix, char *name, int maxSize, boolean isPrefix) |
| static boolean | fofSearch (struct fof *fof, char *name, int nameSize, boolean isPrefix, struct fofPos *retPos) |
| boolean | fofFindFirst (struct fof *fof, char *prefix, int prefixSize, struct fofPos *retPos) |
| boolean | fofFind (struct fof *fof, char *name, struct fofPos *retPos) |
| void * | fofFetch (struct fof *fof, char *name, int *retSize) |
| char * | fofFetchString (struct fof *fof, char *name, int *retSize) |
| static int | cmpOnKey (const void *va, const void *vb) |
| static int | cmpOnFilePos (const void *va, const void *vb) |
| static void | elFromRec (struct fof *fof, struct fofRecord *rec, struct fofBatch *el) |
| fofBatch * | fofBatchFind (struct fof *fof, struct fofBatch *list) |
| static int | cmpRecList (const void *va, const void *vb) |
| fofRecList * | newFofRecEl (int fileIx, long offset, long size, char *name, int nameLen) |
| void | fofMake (char *inFiles[], int inCount, char *outName, boolean(*readHeader)(FILE *inFile, void *data), boolean(*nextRecord)(FILE *inFile, void *data, char **rName, int *rNameLen), void *data, boolean dupeOk) |
Variables | |
| static char const | rcsid [] = "$Id: fof.c,v 1.7 2006/03/10 17:43:36 angie Exp $" |
| lm * | localMem |
| static bits16 | maxNameSize |
| static int cmpOnFilePos | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 333 of file fof.c.
References fofBatch::f, and fofBatch::offset.
00336 { 00337 const struct fofBatch *a = *((struct fofBatch **)va); 00338 const struct fofBatch *b = *((struct fofBatch **)vb); 00339 int dif = a->f - b->f; 00340 if (dif == 0) 00341 dif = a->offset - b->offset; 00342 return dif; 00343 }
| static int cmpOnKey | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 324 of file fof.c.
References fofBatch::key.
Referenced by fofBatchFind().
00327 { 00328 const struct fofBatch *a = *((struct fofBatch **)va); 00329 const struct fofBatch *b = *((struct fofBatch **)vb); 00330 return strcmp(a->key, b->key); 00331 }
Here is the caller graph for this function:

| static int cmpRecList | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 410 of file fof.c.
References bits32, fofRecord::fileIx, fofRecord::name, fofRecord::offset, fofRecList::rec, and UBYTE.
Referenced by fofMake().
00413 { 00414 const struct fofRecList *a = *((struct fofRecList **)va); 00415 const struct fofRecList *b = *((struct fofRecList **)vb); 00416 int dif; 00417 dif = strcmp(a->rec.name, b->rec.name); 00418 if (dif == 0) 00419 { 00420 UBYTE ao = a->rec.fileIx; 00421 UBYTE bo = b->rec.fileIx; 00422 if (ao < bo) 00423 dif = -1; 00424 else if (ao > bo) 00425 dif = 1; 00426 else 00427 { 00428 bits32 ao = a->rec.offset; 00429 bits32 bo = b->rec.offset; 00430 if (ao < bo) 00431 dif = -1; 00432 else if (ao == bo) 00433 dif = 0; 00434 else 00435 dif = 1; 00436 } 00437 } 00438 return dif; 00439 }
Here is the caller graph for this function:

Definition at line 345 of file fof.c.
References fofBatch::f, fofRecord::fileIx, fof::fileNames, fof::files, mustOpen(), fofRecord::offset, fofBatch::offset, fofRecord::size, and fofBatch::size.
Referenced by fofBatchFind().
00347 { 00348 FILE *ff; 00349 int fileIx = rec->fileIx; 00350 if ((ff = fof->files[fileIx]) != NULL) 00351 { 00352 el->f = ff; 00353 } 00354 else 00355 { 00356 el->f = fof->files[fileIx] = mustOpen(fof->fileNames[fileIx], "rb"); 00357 } 00358 el->offset = rec->offset; 00359 el->size = rec->size; 00360 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 362 of file fof.c.
References cmpOnKey(), elFromRec(), fof::f, fofBatch::f, fof::headSize, fof::itemSize, fofBatch::key, mustRead(), fofRecord::name, fofBatch::next, fof::rec, sameString, SEEK_SET, and slSort().
00364 { 00365 struct fofBatch *el; 00366 FILE *f = fof->f; 00367 struct fofRecord *rec = fof->rec; 00368 int itemSize = fof->itemSize; 00369 char *lastKey = ""; 00370 00371 slSort(&list, cmpOnKey); 00372 fseek(f, fof->headSize, SEEK_SET); 00373 for (el = list; el != NULL; el = el->next) 00374 { 00375 char *key = el->key; 00376 if (sameString(key, lastKey)) 00377 { 00378 elFromRec(fof, rec, el); 00379 } 00380 else 00381 { 00382 for (;;) 00383 { 00384 mustRead(f, rec, itemSize); 00385 if (sameString(key, rec->name)) 00386 { 00387 elFromRec(fof, rec, el); 00388 lastKey = key; 00389 break; 00390 } 00391 } 00392 } 00393 } 00394 slSort(&list, cmpOnFilePos); 00395 return list; 00396 }
Here is the call graph for this function:

| void fofClose | ( | struct fof ** | pFof | ) |
Definition at line 129 of file fof.c.
References carefulClose(), fof::f, fof::fileCount, fof::fileNames, fof::files, fof::first, freeMem(), freez(), fof::last, fof::name, and fof::rec.
00131 { 00132 struct fof *fof = *pFof; 00133 if (fof != NULL) 00134 { 00135 int fileCount = fof->fileCount; 00136 int i; 00137 00138 for (i=0; i<fileCount; ++i) 00139 { 00140 freeMem(fof->fileNames[i]); 00141 carefulClose(&fof->files[i]); 00142 } 00143 freeMem(fof->name); 00144 freeMem(fof->fileNames); 00145 freeMem(fof->files); 00146 freeMem(fof->rec); 00147 freeMem(fof->first); 00148 freeMem(fof->last); 00149 carefulClose(&fof->f); 00150 freez(pFof); 00151 } 00152 }
Here is the call graph for this function:

| static int fofCmp | ( | char * | prefix, | |
| char * | name, | |||
| int | maxSize, | |||
| boolean | isPrefix | |||
| ) | [static] |
Definition at line 182 of file fof.c.
Referenced by fofSearch().
00184 { 00185 if (isPrefix) 00186 return memcmp(prefix, name, maxSize); 00187 else 00188 return strcmp(prefix, name); 00189 }
Here is the caller graph for this function:

| int fofElementCount | ( | struct fof * | fof | ) |
Definition at line 154 of file fof.c.
References fof::endIx.
00156 { 00157 return fof->endIx + 1; 00158 }
| void* fofFetch | ( | struct fof * | fof, | |
| char * | name, | |||
| int * | retSize | |||
| ) |
Definition at line 288 of file fof.c.
References errAbort(), fofPos::f, fofFind(), mustRead(), fof::name, needLargeMem(), fofPos::offset, SEEK_SET, and fofPos::size.
00292 { 00293 struct fofPos pos; 00294 void *s; 00295 00296 if (!fofFind(fof, name, &pos)) 00297 errAbort("Couldn't find %s in %s", name, fof->name); 00298 s = needLargeMem(pos.size); 00299 fseek(pos.f, pos.offset, SEEK_SET); 00300 mustRead(pos.f, s, pos.size); 00301 *retSize = pos.size; 00302 return s; 00303 }
Here is the call graph for this function:

| char* fofFetchString | ( | struct fof * | fof, | |
| char * | name, | |||
| int * | retSize | |||
| ) |
Definition at line 305 of file fof.c.
References errAbort(), fofPos::f, fofFind(), mustRead(), fof::name, needLargeMem(), fofPos::offset, SEEK_SET, and fofPos::size.
00309 { 00310 struct fofPos pos; 00311 char *s; 00312 00313 if (!fofFind(fof, name, &pos)) 00314 errAbort("Couldn't find %s in %s", name, fof->name); 00315 s = needLargeMem(pos.size+1); 00316 fseek(pos.f, pos.offset, SEEK_SET); 00317 mustRead(pos.f, s, pos.size); 00318 s[pos.size] = 0; 00319 *retSize = pos.size; 00320 return s; 00321 }
Here is the call graph for this function:

Definition at line 281 of file fof.c.
References FALSE, and fofSearch().
Referenced by fofFetch(), and fofFetchString().
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 248 of file fof.c.
References fof::f, FALSE, fofRecToPos(), fofSearch(), fof::headSize, fofPos::indexIx, fof::itemSize, mustRead(), fofRecord::name, fof::rec, SEEK_SET, and TRUE.
00251 { 00252 int ix; 00253 struct fofRecord *rec = fof->rec; 00254 FILE *f = fof->f; 00255 int itemSize = fof->itemSize; 00256 int headSize = fof->headSize; 00257 00258 /* Find some record that starts with prefix. */ 00259 if (!fofSearch(fof, prefix, prefixSize, TRUE, retPos)) 00260 return FALSE; 00261 00262 /* Backtrack until find one that doesn't start with prefix. */ 00263 ix = retPos->indexIx; 00264 while (--ix >= 0) 00265 { 00266 fseek(f, headSize + ix*itemSize, SEEK_SET); 00267 mustRead(f, rec, itemSize); 00268 if (memcmp(prefix, rec->name, prefixSize) != 0) 00269 break; 00270 } 00271 00272 /* Return the first record that does start with prefix. */ 00273 ++ix; 00274 fseek(f, headSize + ix*itemSize, SEEK_SET); 00275 mustRead(f, rec, itemSize); 00276 fofRecToPos(fof, ix, rec, retPos); 00277 return TRUE; 00278 }
Here is the call graph for this function:

| void fofMake | ( | char * | inFiles[], | |
| int | inCount, | |||
| char * | outName, | |||
| boolean(*)(FILE *inFile, void *data) | readHeader, | |||
| boolean(*)(FILE *inFile, void *data, char **rName, int *rNameLen) | nextRecord, | |||
| void * | data, | |||
| boolean | dupeOk | |||
| ) |
Definition at line 458 of file fof.c.
References bits16, bits32, cmpRecList(), errnoAbort(), fofSig, lmCleanup(), lmInit(), localMem, maxNameSize, mustOpen(), mustWrite(), name, newFofRecEl(), fofRecList::next, sameString, slAddHead, slCount(), slSort(), UBYTE, warn(), and writeOne.
00463 : 00464 * inFiles - List of files that you're indexing with header read and verified. 00465 * inCount - Size of file list. 00466 * outName - name of index file to create 00467 * readHeader - function that sets up file to read first record. May be NULL. 00468 * nextRecord - function that reads next record in file you're indexing 00469 * and returns the name of that record. Returns FALSE at 00470 * end of file. Can set *rNameLen to zero you want indexer 00471 * to ignore the record. 00472 * data - void pointer passed through to nextRecord. 00473 * dupeOk - set to TRUE if you want dupes to not cause squawking 00474 */ 00475 { 00476 FILE *out; 00477 bits32 sig = fofSig; 00478 bits32 elCount = 0; 00479 bits16 fileCount = inCount; 00480 struct fofRecList *recList = NULL, *rl; 00481 int i, fileIx, itemSize; 00482 char *lastName = ""; 00483 int maxMod = 10000; 00484 00485 /* Initialize. */ 00486 localMem = lmInit(0); 00487 maxNameSize = 0; 00488 00489 /* Read in all records and sort by name. */ 00490 for (fileIx = 0; fileIx<inCount; ++fileIx) 00491 { 00492 char *inName = inFiles[fileIx]; 00493 FILE *in = mustOpen(inName, "rb"); 00494 bits32 start, end; 00495 char *name; 00496 int nameLen; 00497 int mod = maxMod; 00498 00499 printf("Processing %s\n", inName); 00500 if (readHeader) 00501 readHeader(in, data); 00502 start = ftell(in); 00503 while (nextRecord(in, data, &name, &nameLen)) 00504 { 00505 if (--mod == 0) 00506 { 00507 putc('.', stdout); 00508 fflush(stdout); 00509 mod = maxMod; 00510 } 00511 end = ftell(in); 00512 if (nameLen > 0) 00513 { 00514 rl = newFofRecEl(fileIx, start, end-start, name, nameLen); 00515 slAddHead(&recList, rl); 00516 } 00517 start = end; 00518 } 00519 fclose(in); 00520 printf("\n"); 00521 } 00522 00523 printf("sorting\n"); 00524 slSort(&recList, cmpRecList); 00525 00526 /* Count up names. */ 00527 if (dupeOk) 00528 elCount = slCount(recList); 00529 else 00530 { 00531 lastName = ""; 00532 for (rl = recList; rl != NULL; rl = rl->next) 00533 { 00534 char *name = rl->rec.name; 00535 if (!sameString(name, lastName)) 00536 { 00537 ++elCount; 00538 lastName = name; 00539 } 00540 } 00541 } 00542 00543 /* Write out index file. */ 00544 printf("Writing %s\n", outName); 00545 out = mustOpen(outName, "wb"); 00546 writeOne(out, sig); 00547 writeOne(out, elCount); 00548 writeOne(out, fileCount); 00549 writeOne(out, maxNameSize); 00550 itemSize = sizeof(bits32) +sizeof(bits32) + sizeof(UBYTE) + maxNameSize; 00551 for (i=0; i<inCount; ++i) 00552 { 00553 char *name = inFiles[i]; 00554 int len = strlen(name)+1; 00555 mustWrite(out, name, len); 00556 } 00557 lastName = ""; 00558 for (rl = recList; rl != NULL; rl = rl->next) 00559 { 00560 if (!dupeOk) 00561 { 00562 char *name = rl->rec.name; 00563 if (sameString(name, lastName)) 00564 { 00565 warn("Duplicate %s only saving first.", name); 00566 continue; 00567 } 00568 else 00569 lastName = name; 00570 } 00571 writeOne(out, rl->rec.offset); 00572 writeOne(out, rl->rec.size); 00573 writeOne(out, rl->rec.fileIx); 00574 mustWrite(out, rl->rec.name, maxNameSize); 00575 } 00576 if (fclose(out) != 0) 00577 errnoAbort("fclose failed"); 00578 /* Clean up. */ 00579 lmCleanup(&localMem); 00580 }
Here is the call graph for this function:

| struct fof* fofOpen | ( | char * | fofName, | |
| char * | fofDir | |||
| ) | [read] |
Definition at line 62 of file fof.c.
References AllocVar, bits16, bits32, cloneString(), fof::endIx, errAbort(), fof::f, fof::fileCount, fof::fileNames, fof::files, fof::first, fofSig, fof::headSize, fof::itemSize, fof::last, fof::maxNameSize, maxNameSize, mustOpen(), mustRead(), mustReadOne, fof::name, needMem(), readStringZ(), fof::rec, fof::relDir, safef(), SEEK_SET, UBYTE, and warn().
00065 { 00066 bits32 sig, elCount; 00067 bits16 fileCount, maxNameSize; 00068 FILE *f; 00069 char nameBuf[512]; 00070 char pathBuf[512]; 00071 struct fof *fof; 00072 int i; 00073 00074 /* Handle directory either being something or NULL, and 00075 * either ending with a slash or not. */ 00076 if (fofDir == NULL) 00077 { 00078 fofDir = ""; 00079 } 00080 00081 /* Open file, verify signature. */ 00082 safef(pathBuf, sizeof(pathBuf), "%s%s", fofDir, fofName); 00083 f = mustOpen(pathBuf, "rb"); 00084 mustReadOne(f, sig); 00085 if (sig != fofSig) 00086 errAbort("Bad signature on %s", pathBuf); 00087 mustReadOne(f, elCount); 00088 00089 /* Read size info and allocate basic fof structure. */ 00090 mustReadOne(f, fileCount); 00091 if (fileCount > 12) 00092 warn("%d files indexed in fof %s!?", fileCount, fofName); 00093 mustReadOne(f, maxNameSize); 00094 if (maxNameSize > 40) 00095 warn("%d maxName size in fof %s!?", maxNameSize, fofName); 00096 AllocVar(fof); 00097 fof->name = cloneString(fofName); 00098 fof->relDir = cloneString(fofDir); 00099 fof->fileNames = needMem(fileCount * sizeof(fof->fileNames[0])); 00100 fof->files = needMem(fileCount * sizeof(fof->files[0])); 00101 fof->f = f; 00102 fof->fileCount = fileCount; 00103 fof->endIx = elCount-1; 00104 fof->maxNameSize = maxNameSize; 00105 fof->itemSize = sizeof(bits32) +sizeof(bits32) + sizeof(UBYTE) + maxNameSize; 00106 fof->rec = needMem(sizeof(*fof->rec) + maxNameSize); 00107 fof->first = needMem(sizeof(*fof->rec) + maxNameSize); 00108 fof->last = needMem(sizeof(*fof->rec) + maxNameSize); 00109 00110 /* Read in names of files being indexed and figure header size. */ 00111 for (i=0; i<fileCount; ++i) 00112 { 00113 readStringZ(f, nameBuf, sizeof(nameBuf)); 00114 safef(pathBuf, sizeof(pathBuf), "%s%s", fofDir, nameBuf); 00115 fof->fileNames[i] = cloneString(pathBuf); 00116 } 00117 fof->headSize = ftell(f); 00118 00119 /* Read in first and last records. */ 00120 mustRead(f, fof->first, fof->itemSize); 00121 fseek(f, fof->headSize + fof->endIx*fof->itemSize, SEEK_SET); 00122 mustRead(f, fof->last, fof->itemSize); 00123 00124 /* All done (files will be opened as needed, not here). */ 00125 return fof; 00126 }
Here is the call graph for this function:

| static void fofRecToPos | ( | struct fof * | fof, | |
| int | ix, | |||
| struct fofRecord * | rec, | |||
| struct fofPos * | pos | |||
| ) | [static] |
Definition at line 160 of file fof.c.
References fofPos::f, fof::f, fofRecord::fileIx, fofPos::fileName, fof::fileNames, fof::files, fofPos::indexIx, mustOpen(), fofRecord::offset, fofPos::offset, fof::rec, fofRecord::size, and fofPos::size.
Referenced by fofFindFirst(), and fofSearch().
00162 { 00163 int fileIx = rec->fileIx; 00164 FILE *f; 00165 00166 pos->indexIx = ix; 00167 pos->offset = rec->offset; 00168 pos->size = rec->size; 00169 pos->fileName = fof->fileNames[fileIx]; 00170 if ((f = fof->files[fileIx]) != NULL) 00171 { 00172 pos->f = f; 00173 } 00174 else 00175 { 00176 pos->f = fof->files[fileIx] = mustOpen(fof->fileNames[fileIx], "rb"); 00177 } 00178 return; 00179 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean fofSearch | ( | struct fof * | fof, | |
| char * | name, | |||
| int | nameSize, | |||
| boolean | isPrefix, | |||
| struct fofPos * | retPos | |||
| ) | [static] |
Definition at line 191 of file fof.c.
References fof::endIx, fof::f, FALSE, fof::first, fofCmp(), fofRecToPos(), fof::headSize, fof::itemSize, fof::last, fof::maxNameSize, mustRead(), fofRecord::name, fof::rec, SEEK_SET, and TRUE.
Referenced by fofFind(), and fofFindFirst().
00195 { 00196 struct fofRecord *rec = fof->rec; 00197 int startIx, endIx, midIx; 00198 int cmp; 00199 int itemSize = fof->itemSize; 00200 FILE *f = fof->f; 00201 int headSize = fof->headSize; 00202 00203 /* Truncate name size if necessary. */ 00204 if (nameSize > fof->maxNameSize) 00205 nameSize = fof->maxNameSize; 00206 00207 /* Set up endpoints of binary search */ 00208 startIx = 0; 00209 endIx = fof->endIx; 00210 00211 /* Check for degenerate initial case */ 00212 if (fofCmp(name, fof->first->name, nameSize, isPrefix) == 0) 00213 { 00214 fofRecToPos(fof, startIx, fof->first, retPos); 00215 return TRUE; 00216 } 00217 if (fofCmp(name, fof->last->name, nameSize, isPrefix) == 0) 00218 { 00219 fofRecToPos(fof, endIx, fof->last, retPos); 00220 return TRUE; 00221 } 00222 00223 /* Do binary search. */ 00224 for (;;) 00225 { 00226 midIx = (startIx + endIx ) / 2; 00227 if (midIx == startIx || midIx == endIx) 00228 return FALSE; 00229 fseek(f, headSize + midIx*itemSize, SEEK_SET); 00230 mustRead(f, rec, itemSize); 00231 cmp = fofCmp(name, rec->name, nameSize, isPrefix); 00232 if (cmp == 0) 00233 { 00234 fofRecToPos(fof, midIx, rec, retPos); 00235 return TRUE; 00236 } 00237 else if (cmp > 0) 00238 { 00239 startIx = midIx; 00240 } 00241 else 00242 { 00243 endIx = midIx; 00244 } 00245 } 00246 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct fofRecList* newFofRecEl | ( | int | fileIx, | |
| long | offset, | |||
| long | size, | |||
| char * | name, | |||
| int | nameLen | |||
| ) | [read] |
Definition at line 443 of file fof.c.
References lmAlloc(), localMem, and maxNameSize.
Referenced by fofMake().
00445 { 00446 struct fofRecList *fr; 00447 00448 if (maxNameSize < nameLen) 00449 maxNameSize = nameLen; 00450 fr = lmAlloc(localMem, sizeof(*fr) + nameLen); 00451 fr->rec.offset = offset; 00452 fr->rec.size = size; 00453 fr->rec.fileIx = fileIx; 00454 memcpy(fr->rec.name, name, nameLen); 00455 return fr; 00456 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void readStringZ | ( | FILE * | f, | |
| char * | s, | |||
| int | maxLen | |||
| ) | [static] |
Definition at line 41 of file fof.c.
References errAbort().
Referenced by fofOpen().
00043 { 00044 int c; 00045 int ix; 00046 maxLen -= 1; /* room for zero tag. */ 00047 00048 for (ix = 0; ix <maxLen; ++ix) 00049 { 00050 if ((c = fgetc(f)) == EOF) 00051 errAbort("Unexpected EOF in readStringZ"); 00052 if (c == 0) 00053 break; 00054 s[ix] = c; 00055 } 00056 if (ix == maxLen) 00057 errAbort("String too long in readStringZ"); 00058 s[ix] = 0; 00059 }
Here is the call graph for this function:

Here is the caller graph for this function:

bits16 maxNameSize [static] |
char const rcsid[] = "$Id: fof.c,v 1.7 2006/03/10 17:43:36 angie Exp $" [static] |
1.5.2