lib/snof.c

Go to the documentation of this file.
00001 /* Snof.c Sorted Name Offset File.
00002  * This accesses a file of name/offset pairs that are sorted by
00003  * name.  Does a binary search of file to find the offset given name.
00004  * Most typically this is used to do a quick lookup given an index file. 
00005  *
00006  * This file is copyright 2002 Jim Kent, but license is hereby
00007  * granted for all use - public, private or commercial. */
00008 
00009 #include "common.h"
00010 #include "snof.h"
00011 #include "snofmake.h"
00012 
00013 static char const rcsid[] = "$Id: snof.c,v 1.4 2003/05/06 07:33:44 kate Exp $";
00014 
00015 void snofClose(struct snof **pSnof)
00016 /* Close down the index file. */
00017 {
00018 struct snof *snof = *pSnof;
00019 if (snof != NULL)
00020     {
00021     if (snof->file != NULL)
00022         fclose(snof->file);
00023     if (snof->first != NULL)
00024         freeMem(snof->first);
00025     freeMem(snof);
00026     *pSnof = NULL;
00027     }
00028 }
00029 
00030 static boolean makeSnofName(char *snofName, char *rootName)
00031 /* Figure out whether it's .xi or .ix by looking at
00032  * signature.  Usually just need to open a file to
00033  * do this once per program run. */
00034 {
00035 static char *suffixes[2] = {".ix", ".xi"};
00036 static char *suff = NULL;
00037 int sigBuf[4];
00038 FILE *f;
00039 int i;
00040 
00041 if (suff == NULL)
00042     {
00043     for (i=0; i<ArraySize(suffixes); ++i)
00044         {
00045         sprintf(snofName, "%s%s", rootName, suffixes[i]);
00046         if ((f = fopen(snofName, "rb")) != NULL)
00047             {
00048             if ((fread(sigBuf, sizeof(sigBuf), 1, f)) == 1)
00049                 {
00050                 if (isSnofSig(&sigBuf))
00051                     {
00052                     suff = suffixes[i];
00053                     }
00054                 }
00055             fclose(f);
00056             }
00057         if (suff != NULL)
00058             break;
00059         }
00060     }
00061 if (suff == NULL)
00062     return FALSE;
00063 sprintf(snofName, "%s%s", rootName, suff);
00064 return TRUE;
00065 }
00066 
00067 struct snof *snofOpen(char *indexName)
00068 /* Open up the index file.  Returns NULL if there's any problem. */
00069 {
00070 struct snof *snof;
00071 int sigBuf[4];
00072 FILE *f;
00073 char fileName[512];
00074 
00075 if (!makeSnofName(fileName, indexName))
00076     return NULL;
00077 if ((snof = needMem(sizeof(*snof))) == NULL)
00078     return NULL;
00079 
00080 if ((snof->file = f = fopen(fileName, "rb")) == NULL)
00081     {
00082     freeMem(snof);
00083     return NULL;
00084     }
00085 if ((fread(sigBuf, sizeof(sigBuf), 1, f)) != 1)
00086     {
00087     snofClose(&snof);
00088     return NULL;
00089     }
00090 if (!isSnofSig(&sigBuf))
00091     {
00092     snofClose(&snof);
00093     return NULL;
00094     }
00095 if ((fread(&snof->maxNameSize, sizeof(snof->maxNameSize), 1, f)) != 1)
00096     {
00097     snofClose(&snof);
00098     return NULL;
00099     }
00100 snof->headSize = ftell(f);
00101 snof->itemSize = snof->maxNameSize + sizeof(unsigned);
00102 if ((snof->first = needMem(5*snof->itemSize)) == NULL)
00103     {
00104     snofClose(&snof);
00105     return NULL;
00106     }
00107 snof->last = snof->first + snof->itemSize;
00108 snof->less = snof->last + snof->itemSize;
00109 snof->mid = snof->less + snof->itemSize;
00110 snof->more = snof->mid + snof->itemSize;
00111 
00112 if (fread(snof->first, snof->itemSize, 1, f) != 1)
00113     {
00114     snofClose(&snof);
00115     return NULL;
00116     }
00117 fseek(f, -snof->itemSize, SEEK_END);
00118 snof->endIx = (ftell(f)-snof->headSize)/snof->itemSize;
00119 if (fread(snof->last, snof->itemSize, 1, f) != 1)
00120     {
00121     snofClose(&snof);
00122     return NULL;
00123     }
00124 return snof;
00125 }
00126 
00127 struct snof *snofMustOpen(char *indexName)
00128 /* Open up index file or die. */
00129 {
00130 struct snof *snof = snofOpen(indexName);
00131 if (snof == NULL)
00132     errAbort("Couldn't open index file %s", indexName);
00133 return snof;
00134 }
00135 
00136 static long retrieveOffset(char *item, int itemSize)
00137 {
00138 unsigned offset;
00139 memcpy(&offset, item+itemSize-sizeof(offset), sizeof(offset));
00140 return offset;
00141 }
00142 
00143 static int snofCmp(char *prefix, char *name, int maxSize, boolean isPrefix)
00144 {
00145 if (isPrefix)
00146     return memcmp(prefix, name, maxSize);
00147 else
00148     return strcmp(prefix, name);
00149 }
00150 
00151 static boolean snofSearch(struct snof *snof, char *name, int nameSize, 
00152     boolean isPrefix, int *pIx, char **pNameOffset)
00153 /* Find index of name by binary search.
00154  * Returns FALSE if no such name in the index file. */
00155  {
00156 char *startName, *endName, *midName;
00157 int startIx, endIx, midIx;
00158 int cmp;
00159 int itemSize = snof->itemSize;
00160 FILE *f = snof->file;
00161 int headSize = snof->headSize;
00162 
00163 /* Truncate name size if necessary. */
00164 if (nameSize > snof->maxNameSize)
00165     nameSize = snof->maxNameSize;
00166 
00167 /* Set up endpoints of binary search */
00168 startName = snof->less;
00169 memcpy(startName, snof->first, itemSize);
00170 endName = snof->more;
00171 memcpy(endName, snof->last, itemSize);
00172 midName = snof->mid;
00173 
00174 startIx = 0;
00175 endIx = snof->endIx;
00176 
00177 /* Check for degenerate initial case */
00178 if (snofCmp(name, startName, nameSize, isPrefix) == 0)
00179     {
00180     *pIx = startIx;
00181     *pNameOffset = startName;
00182     return TRUE;
00183     }
00184 if (snofCmp(name, endName, nameSize, isPrefix) == 0)
00185     {
00186     *pIx = endIx;
00187     *pNameOffset =  endName;
00188     return TRUE;
00189     }
00190 
00191 /* Do binary search. */
00192 for (;;)
00193     {
00194     midIx = (startIx + endIx ) / 2;
00195     if (midIx == startIx || midIx == endIx)
00196         {
00197         *pIx = -1;
00198         return FALSE;
00199         }
00200     fseek(f, headSize + midIx*itemSize, SEEK_SET);
00201     if (fread(midName, itemSize, 1, f) < 1)
00202         {
00203         *pIx = 0;
00204         return FALSE;
00205         }
00206     cmp = snofCmp(name, midName, nameSize, isPrefix);
00207     if (cmp == 0)
00208         {
00209         *pIx = midIx;
00210         *pNameOffset = midName;
00211         return TRUE;
00212         }
00213     else if (cmp > 0)
00214         {
00215         memcpy(startName, midName, itemSize);
00216         startIx = midIx;
00217         }
00218     else
00219         {
00220         memcpy(endName, midName, itemSize);
00221         endIx = midIx;
00222         }
00223     }
00224 }
00225 
00226 boolean snofFindOffset(struct snof *snof, char *name, long *pOffset)
00227 /* Find offset corresponding with name.  Returns FALSE if no such name
00228  * in the index file. */
00229 {
00230 char *nameOffset;
00231 int matchIx;
00232 if (!snofSearch(snof, name, strlen(name), FALSE,  &matchIx, &nameOffset))
00233     {
00234     *pOffset = matchIx; /* Pass along error code such as it is. */
00235     return FALSE;
00236     }
00237 *pOffset = retrieveOffset(nameOffset, snof->itemSize);
00238 return TRUE;
00239 }
00240 
00241 boolean snofFindFirstStartingWith(struct snof *snof, char *prefix, int prefixSize,
00242     int *pSnofIx)
00243 /* Find first index in snof file whose name begins with prefix. */
00244 {
00245 char *nameOffset;
00246 int matchIx;
00247 if (!snofSearch(snof, prefix, prefixSize, TRUE,  &matchIx, &nameOffset))
00248     {
00249     *pSnofIx = matchIx; /* Pass along error code such as it is. */
00250     return FALSE;
00251     }
00252 while (--matchIx >= 0)
00253     {
00254     nameOffset = snofNameAtIx(snof, matchIx);
00255     if (snofCmp(prefix, nameOffset, prefixSize, TRUE) != 0)
00256         break;
00257     }
00258 ++matchIx;
00259 *pSnofIx = matchIx;
00260 return TRUE;
00261 }
00262 
00263 int snofElementCount(struct snof *snof)
00264 /* How many names are in snof file? */
00265 {
00266 return snof->endIx + 1;
00267 }
00268 
00269 long snofOffsetAtIx(struct snof *snof, int ix)
00270 /* The offset of a particular index in file. */
00271 {
00272 char *nameOffset = snofNameAtIx(snof, ix);
00273 return retrieveOffset(nameOffset, snof->itemSize);
00274 }
00275 
00276 char *snofNameAtIx(struct snof *snof, int ix)
00277 /* The name at a particular index in file.  (This will be overwritten by
00278  * later calls to snof system. Strdup if you want to keep it.)
00279  */
00280 {
00281 fseek(snof->file, snof->headSize + ix*snof->itemSize, SEEK_SET);
00282 fread(snof->mid, snof->itemSize, 1, snof->file);
00283 return snof->mid;
00284 }
00285 
00286 void snofNameOffsetAtIx(struct snof *snof, int ix, char **pName, long *pOffset)
00287 /* Get both name and offset for an index. */
00288 {
00289 char *nameOffset = snofNameAtIx(snof, ix);
00290 *pName = nameOffset;
00291 *pOffset = retrieveOffset(nameOffset, snof->itemSize);
00292 }
00293 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2