lib/ra.c

Go to the documentation of this file.
00001 /* Stuff to parse .ra files. Ra files are simple text databases.
00002  * The database is broken into records by blank lines. 
00003  * Each field takes a line.  The name of the field is the first
00004  * word in the line.  The value of the field is the rest of the line.
00005  *
00006  * This file is copyright 2002 Jim Kent, but license is hereby
00007  * granted for all use - public, private or commercial. */
00008 
00009 #include "common.h"
00010 #include "linefile.h"
00011 #include "hash.h"
00012 #include "localmem.h"
00013 #include "ra.h"
00014 
00015 static char const rcsid[] = "$Id: ra.c,v 1.11 2007/03/28 17:13:55 kent Exp $";
00016 
00017 struct hash *raNextRecord(struct lineFile *lf)
00018 /* Return a hash containing next record.   
00019  * Returns NULL at end of file.  freeHash this
00020  * when done.  Note this will free the hash
00021  * keys and values as well, so you'll have to
00022  * cloneMem them if you want them for later. */
00023 {
00024 struct hash *hash = NULL;
00025 char *line, *key, *val;
00026 
00027 /* Skip leading empty lines. */
00028 for (;;)
00029    {
00030    if (!lineFileNext(lf, &line, NULL))
00031        return NULL;
00032    line = skipLeadingSpaces(line);
00033    if (line[0] != 0)
00034        break;
00035    }
00036 lineFileReuse(lf);
00037 for (;;)
00038    {
00039    if (!lineFileNext(lf, &line, NULL))
00040        break;
00041    line = skipLeadingSpaces(line);
00042    if (line[0] == 0)
00043        break;
00044    if (line[0] == '#')
00045        {
00046        if (startsWith("#EOF", line))
00047            return NULL;
00048        else
00049            continue;
00050        }
00051    if (hash == NULL)
00052        hash = newHash(7);
00053    key = nextWord(&line);
00054    val = skipLeadingSpaces(line);
00055    if (line == NULL)
00056        line = "";
00057    val = lmCloneString(hash->lm, val);
00058    hashAdd(hash, key, val);
00059    }
00060 return hash;
00061 }
00062 
00063 struct hash *raFromString(char *string)
00064 /* Return hash of key/value pairs from string.
00065  * As above freeHash this when done. */
00066 {
00067 char *dupe = cloneString(string);
00068 char *s = dupe, *lineEnd;
00069 struct hash *hash = newHash(7);
00070 char *key, *val;
00071 
00072 for (;;)
00073     {
00074     s = skipLeadingSpaces(s);
00075     if (s == NULL || s[0] == 0)
00076         break;
00077     lineEnd = strchr(s, '\n');
00078     if (lineEnd != NULL)
00079         *lineEnd++ = 0;
00080     key = nextWord(&s);
00081     val = skipLeadingSpaces(s);
00082     s = lineEnd;
00083     val = lmCloneString(hash->lm, val);
00084     hashAdd(hash, key, val);
00085     }
00086 freeMem(dupe);
00087 return hash;
00088 }
00089 
00090 char *raFoldInOneRetName(struct lineFile *lf, struct hash *hashOfHash)
00091 /* Fold in one record from ra file into hashOfHash. 
00092  * This will add ra's and ra fields to whatever already
00093  * exists in the hashOfHash,  overriding fields of the
00094  * same name if they exist already. */
00095 {
00096 char *word, *line, *name;
00097 struct hash *ra;
00098 struct hashEl *hel;
00099 
00100 /* Get first nonempty non-comment line and make sure
00101  * it contains name. */
00102 if (!lineFileNextReal(lf, &line))
00103     return NULL;
00104 word = nextWord(&line);
00105 if (!sameString(word, "name"))
00106     errAbort("Expecting 'name' line %d of %s, got %s", 
00107         lf->lineIx, lf->fileName, word);
00108 name = nextWord(&line);
00109 if (name == NULL)
00110     errAbort("Short name field line %d of %s", lf->lineIx, lf->fileName);
00111 
00112 /* Find ra hash associated with name, making up a new
00113  * one if need be. */
00114 if ((ra = hashFindVal(hashOfHash, name)) == NULL)
00115     {
00116     ra = newHash(7);
00117     hashAdd(hashOfHash, name, ra);
00118     hashAdd(ra, "name", lmCloneString(ra->lm, name));
00119     }
00120 
00121 /* Fill in fields of ra hash with data up to next
00122  * blank line or end of file. */
00123 for (;;)
00124     {
00125     if (!lineFileNext(lf, &line, NULL))
00126         break;
00127     line = skipLeadingSpaces(line);
00128     if (line[0] == 0)
00129         break;
00130     if (line[0] == '#')
00131         continue;
00132     word = nextWord(&line);
00133     line = skipLeadingSpaces(line);
00134     if (line == NULL)
00135         line = "";
00136     hel = hashLookup(ra, word);
00137     if (hel == NULL)
00138         hel = hashAdd(ra, word, lmCloneString(ra->lm, line));
00139     else
00140         hel->val = lmCloneString(ra->lm, line);
00141     }
00142 return hashFindVal(ra, "name");
00143 }
00144 
00145 boolean raFoldInOne(struct lineFile *lf, struct hash *hashOfHash)
00146 {
00147 return raFoldInOneRetName(lf, hashOfHash) != NULL;
00148 }
00149 
00150 void raFoldIn(char *fileName, struct hash *hashOfHash)
00151 /* Read ra's in file name and fold them into hashOfHash. 
00152  * This will add ra's and ra fields to whatever already
00153  * exists in the hashOfHash,  overriding fields of the
00154  * same name if they exist already. */
00155 {
00156 struct lineFile *lf = lineFileMayOpen(fileName, TRUE);
00157 if (lf != NULL)
00158     {
00159     struct hash *uniqHash = hashNew(0);
00160     char *name;
00161     while ((name = raFoldInOneRetName(lf, hashOfHash)) != NULL)
00162         {
00163         if (hashLookup(uniqHash, name))
00164             errAbort("%s duplicated in record ending line %d of %s", name, 
00165                 lf->lineIx, lf->fileName);
00166         hashAdd(uniqHash, name, NULL);
00167         }
00168     lineFileClose(&lf);
00169     hashFree(&uniqHash);
00170     }
00171 }
00172 
00173 struct hash *raReadSingle(char *fileName)
00174 /* Read in first ra record in file and return as hash. */
00175 {
00176 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00177 struct hash *hash = raNextRecord(lf);
00178 lineFileClose(&lf);
00179 return hash;
00180 }
00181 
00182 struct hash *raReadAll(char *fileName, char *keyField)
00183 /* Return hash that contains all ra records in file keyed
00184  * by given field, which must exist.  The values of the
00185  * hash are themselves hashes. */
00186 {
00187 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00188 struct hash *bigHash = hashNew(0);
00189 struct hash *hash;
00190 while ((hash = raNextRecord(lf)) != NULL)
00191     {
00192     char *key = hashFindVal(hash, keyField);
00193     if (key == NULL)
00194         errAbort("Couldn't find key field %s line %d of %s",
00195                 keyField, lf->lineIx, lf->fileName);
00196     hashAdd(bigHash, key, hash);
00197     }
00198 lineFileClose(&lf);
00199 return bigHash;
00200 }

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2