lib/dtdParse.c File Reference

#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "dystring.h"
#include "obscure.h"
#include "dtdParse.h"

Include dependency graph for dtdParse.c:

Go to the source code of this file.

Functions

static void syntaxError (struct lineFile *lf)
static char * needNextWord (char **pLine, struct lineFile *lf)
void needQuotedString (char *in, char *out, struct lineFile *lf, char **retNext)
static boolean isAllUpper (char *s)
static boolean isAllLower (char *s)
static char * mixedCaseName (char *prefix, char *orig)
static struct hashinitialEntityHash ()
static struct dtdElementparseElement (char *prefix, char *textField, char *line, struct hash *elHash, struct lineFile *lf)
static void parseAttribute (char *line, char *textField, struct hash *elHash, struct lineFile *lf)
void parseEntity (struct hash *entityHash, struct hash *predefEntityHash, char *line, struct lineFile *lf)
static void fixupChildRefs (struct dtdElement *elList, struct hash *elHash, char *fileName)
static char * eatComment (struct lineFile *lf, char *line)
static void expandEntities (char *s, struct hash *entityHash, struct lineFile *lf, struct dyString *dest)
static char * dtdxTag (struct lineFile *lf, struct hash *entityHash, struct dyString *buf)
void dtdParse (char *fileName, char *prefix, char *textField, struct dtdElement **retList, struct hash **retHash)
void dtdElementDump (struct dtdElement *el, FILE *f)


Function Documentation

void dtdElementDump ( struct dtdElement el,
FILE *  f 
)

Definition at line 439 of file dtdParse.c.

References dtdElement::attributes, dtdElement::children, dtdElChild::copyCode, dtdElChild::isOr, dtdElement::mixedCaseName, dtdAttribute::name, dtdElChild::name, dtdElement::name, dtdAttribute::next, dtdElChild::next, dtdAttribute::required, dtdElement::textType, dtdAttribute::type, and dtdAttribute::usual.

00441 {
00442 struct dtdElChild *ec;
00443 struct dtdAttribute *att;
00444 fprintf(f, "%s %s (", el->name, el->mixedCaseName);
00445 for (ec = el->children; ec != NULL; ec = ec->next)
00446     {
00447     fprintf(f, "%s", ec->name);
00448     if (ec->copyCode != '1')
00449         fprintf(f, "%c", ec->copyCode);
00450     if (ec->isOr)
00451         fprintf(f, " (isOr)");
00452     if (ec->next != NULL)
00453         fprintf(f, ", ");
00454     }
00455 fprintf(f, ")");
00456 if (el->textType != NULL)
00457     fprintf(f, " (%s)", el->textType);
00458 fprintf(f, "\n");
00459 for (att = el->attributes; att != NULL; att = att->next)
00460     {
00461     fprintf(f, "  %s %s %s %s\n",
00462         att->name, att->type, (att->usual ? att->usual : "n/a"),  
00463         (att->required ? "required" : "optional"));
00464     }
00465 }

void dtdParse ( char *  fileName,
char *  prefix,
char *  textField,
struct dtdElement **  retList,
struct hash **  retHash 
)

Definition at line 371 of file dtdParse.c.

References dtdxTag(), dyStringFree, dyStringNew, eatComment(), errAbort(), fixupChildRefs(), freeHashAndVals(), initialEntityHash(), lineFileClose(), lineFileOpen(), needNextWord(), newHash(), parseAttribute(), parseElement(), parseEntity(), sameWord, slAddHead, slReverse(), startsWith(), syntaxError(), trimSpaces(), and TRUE.

00383 {
00384 struct hash *elHash = newHash(8);
00385 struct hash *entityHash = initialEntityHash();
00386 struct hash *predefEntityHash = initialEntityHash();
00387 struct dtdElement *elList = NULL, *el;
00388 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00389 char *line, *word;
00390 struct dyString *buf = dyStringNew(0);
00391 
00392 if (prefix == NULL)
00393     prefix = "";
00394 if (textField == NULL)
00395     textField = "text";
00396 while ((line = dtdxTag(lf, entityHash, buf)) != NULL)
00397     {
00398     line = trimSpaces(line);
00399     if (line == NULL || line[0] == 0 || line[0] == '#')
00400         continue;
00401     if (startsWith("<!--", line))
00402         {
00403         line = eatComment(lf, line);
00404         if (line == NULL)
00405             continue;
00406         }
00407     if (!startsWith("<!", line))
00408         syntaxError(lf);
00409     line += 2;
00410     word = needNextWord(&line, lf);
00411     if (sameWord("ELEMENT", word))
00412         {
00413         el = parseElement(prefix, textField, line, elHash, lf);
00414         slAddHead(&elList, el);
00415         }
00416     else if (sameWord("ATTLIST", word))
00417         {
00418         parseAttribute(line, textField, elHash, lf);
00419         }
00420     else if (sameWord("ENTITY", word))
00421         {
00422         parseEntity(entityHash, predefEntityHash, line, lf);
00423         }
00424     else
00425         {
00426         errAbort("Don't understand %s line %d of %s", word, lf->lineIx, lf->fileName);
00427         }
00428     }
00429 lineFileClose(&lf);
00430 dyStringFree(&buf);
00431 slReverse(&elList);
00432 fixupChildRefs(elList, elHash, fileName);
00433 freeHashAndVals(&entityHash);
00434 freeHashAndVals(&predefEntityHash);
00435 *retHash = elHash;
00436 *retList = elList;
00437 }

Here is the call graph for this function:

static char* dtdxTag ( struct lineFile lf,
struct hash entityHash,
struct dyString buf 
) [static]

Definition at line 345 of file dtdParse.c.

References dyStringAppendC(), dyStringClear, errAbort(), expandEntities(), lineFile::fileName, lineFileNext(), lineFileNextReal(), lineFile::lineIx, dyString::string, dyString::stringSize, and trimSpaces().

Referenced by dtdParse().

00348 {
00349 char *line;
00350 
00351 /* Skip until get a line that starts with '<' */
00352 if (!lineFileNextReal(lf,  &line))
00353     return NULL;
00354 line = trimSpaces(line);
00355 if (line[0] != '<')
00356     errAbort("Text outside of a tag line %d of %s", lf->lineIx, lf->fileName);
00357 dyStringClear(buf);
00358 for (;;)
00359     {
00360     expandEntities(line, entityHash, lf, buf);
00361     if (buf->string[buf->stringSize-1] == '>')
00362          break;
00363     dyStringAppendC(buf, ' ');
00364     if (!lineFileNext(lf, &line, NULL))
00365         errAbort("End of file %s inside of a tag.", lf->fileName);
00366     line = trimSpaces(line);
00367     }
00368 return buf->string;
00369 }

Here is the call graph for this function:

Here is the caller graph for this function:

static char* eatComment ( struct lineFile lf,
char *  line 
) [static]

Definition at line 298 of file dtdParse.c.

References lineFileNext(), skipLeadingSpaces(), and stringIn.

Referenced by dtdParse().

00300 {
00301 char *s;
00302 for (;;)
00303     {
00304     if ((s = stringIn("-->", line)) != NULL)
00305         {
00306         line = skipLeadingSpaces(s+3);
00307         if (line[0] == 0)
00308             line = NULL;
00309         return line;
00310         }
00311     if (!lineFileNext(lf, &line, NULL))
00312         return NULL;
00313     }
00314 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void expandEntities ( char *  s,
struct hash entityHash,
struct lineFile lf,
struct dyString dest 
) [static]

Definition at line 316 of file dtdParse.c.

References dyStringAppend(), dyStringAppendC(), errAbort(), lineFile::fileName, hashFindVal(), lineFile::lineIx, and name.

Referenced by dtdxTag().

00320 {
00321 char c;
00322 while ((c = *s++) != 0)
00323     {
00324     if (c == '%' && !isspace(s[0]))
00325         {
00326         char *name = s;
00327         char *end = strchr(s, ';');
00328         char *value;
00329         if (end == NULL)
00330             errAbort("Can't find ; after %% to close entity line %d of %s",
00331                 lf->lineIx, lf->fileName);
00332         *end++ = 0;
00333         s = end;
00334         value = hashFindVal(entityHash, name);
00335         if (value == NULL)
00336             errAbort("Entity %%%s; is not defined line %d of %s",
00337                 name, lf->lineIx, lf->fileName);
00338         dyStringAppend(dest, value);
00339         }
00340     else
00341         dyStringAppendC(dest, c);
00342     }
00343 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void fixupChildRefs ( struct dtdElement elList,
struct hash elHash,
char *  fileName 
) [static]

Definition at line 281 of file dtdParse.c.

References dtdElement::children, dtdElChild::el, errAbort(), hashFindVal(), dtdElement::lineIx, dtdElement::name, dtdElChild::name, dtdElChild::next, and dtdElement::next.

Referenced by dtdParse().

00284 {
00285 struct dtdElement *el, *child;
00286 struct dtdElChild *ec;
00287 for (el = elList; el != NULL; el = el->next)
00288     {
00289     for (ec = el->children; ec != NULL; ec = ec->next)
00290         {
00291         if ((child = hashFindVal(elHash, ec->name)) == NULL)
00292             errAbort("%s's child %s undefined line %d of %s", el->name, ec->name, el->lineIx, fileName);
00293         ec->el = child;
00294         }
00295     }
00296 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct hash* initialEntityHash (  )  [static, read]

Definition at line 103 of file dtdParse.c.

References cloneString(), hashAdd(), and hashNew.

Referenced by dtdParse().

00106 {
00107 struct hash *hash = hashNew(0);
00108 hashAdd(hash, "INTEGER", cloneString("#INT"));
00109 hashAdd(hash, "REAL", cloneString("#FLOAT"));
00110 hashAdd(hash, "INT", cloneString("INT"));
00111 hashAdd(hash, "FLOAT", cloneString("FLOAT"));
00112 return hash;
00113 }

Here is the call graph for this function:

Here is the caller graph for this function:

static boolean isAllLower ( char *  s  )  [static]

Definition at line 49 of file dtdParse.c.

References FALSE, and TRUE.

Referenced by mixedCaseName().

00052 {
00053 char c;
00054 while ((c = *s++) != 0)
00055     {
00056     if (isalpha(c) && !islower(c))
00057         return FALSE;
00058     }
00059 return TRUE;
00060 }

Here is the caller graph for this function:

static boolean isAllUpper ( char *  s  )  [static]

Definition at line 36 of file dtdParse.c.

References FALSE, and TRUE.

Referenced by mixedCaseName().

00039 {
00040 char c;
00041 while ((c = *s++) != 0)
00042     {
00043     if (isalpha(c) && !isupper(c))
00044         return FALSE;
00045     }
00046 return TRUE;
00047 }

Here is the caller graph for this function:

static char* mixedCaseName ( char *  prefix,
char *  orig 
) [static]

Definition at line 63 of file dtdParse.c.

References FALSE, isAllLower(), isAllUpper(), needMem(), and TRUE.

Referenced by parseAttribute(), and parseElement().

00066 {
00067 char *mixed;
00068 char *d, *s = orig;
00069 char c;
00070 int prefixLen = strlen(prefix), len;
00071 boolean nextUpper;
00072 boolean allUpper = isAllUpper(orig); 
00073 boolean allLower = isAllLower(orig);
00074 boolean initiallyMixed = (!allUpper && !allLower);
00075 
00076 /* Allocate string big enough for prefix and all. */
00077 len = strlen(orig) + prefixLen;
00078 mixed = d = needMem(len+1);
00079 strcpy(d, prefix);
00080 d += prefixLen;
00081 nextUpper = (prefixLen > 0);
00082 
00083 for (;;)
00084    {
00085    c = *s++;
00086    if (c == '_' || c == '-' || c == ':')
00087        nextUpper = TRUE;
00088    else
00089        {
00090        if (nextUpper)
00091            c = toupper(c);
00092        else if (!initiallyMixed)
00093            c = tolower(c);
00094        nextUpper = FALSE;
00095        *d++ = c;
00096        if (c == 0)
00097            break;
00098        }
00099    }
00100 return mixed;
00101 }

Here is the call graph for this function:

Here is the caller graph for this function:

static char* needNextWord ( char **  pLine,
struct lineFile lf 
) [static]

Definition at line 19 of file dtdParse.c.

References errAbort(), lineFile::fileName, lineFile::lineIx, and nextWord().

Referenced by dtdParse(), parseAttribute(), parseElement(), and parseEntity().

00021 {
00022 char *word = nextWord(pLine);
00023 if (word == NULL)
00024     errAbort("Missing data line %d of %s", lf->lineIx, lf->fileName);
00025 return word;
00026 }

Here is the call graph for this function:

Here is the caller graph for this function:

void needQuotedString ( char *  in,
char *  out,
struct lineFile lf,
char **  retNext 
)

Definition at line 28 of file dtdParse.c.

References errAbort(), lineFile::fileName, lineFile::lineIx, and parseQuotedString().

Referenced by parseAttribute(), and parseEntity().

00031 {
00032 if (!parseQuotedString(in, out, retNext))
00033     errAbort("Missing closing quote line %d of %s", lf->lineIx, lf->fileName);
00034 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void parseAttribute ( char *  line,
char *  textField,
struct hash elHash,
struct lineFile lf 
) [static]

Definition at line 196 of file dtdParse.c.

References AllocVar, dtdElement::attributes, cloneString(), errAbort(), lineFile::fileName, hashFindVal(), lineFile::lineIx, mixedCaseName(), dtdAttribute::mixedCaseName, dtdAttribute::name, needNextWord(), needQuotedString(), dtdAttribute::required, sameString, sameWord, skipLeadingSpaces(), slAddTail(), TRUE, dtdAttribute::type, and dtdAttribute::usual.

Referenced by dtdParse().

00199 {
00200 char *word;
00201 struct dtdAttribute *att;
00202 struct dtdElement *el;
00203 char *e;
00204 
00205 /* Get rid of trailing '>' */
00206 e = strrchr(line, '>');
00207 if (e == NULL)
00208     errAbort("Missing '>' line %d of %s", lf->lineIx, lf->fileName);
00209 *e = 0;
00210 
00211 word = needNextWord(&line, lf);
00212 if ((el = hashFindVal(elHash, word)) == NULL)
00213     errAbort("Undefined %s line %d of %s", word, lf->lineIx, lf->fileName);
00214 word = needNextWord(&line, lf);
00215 if (sameString(word, textField))
00216     errAbort("Name conflict with text field name line %d of %s", lf->lineIx, lf->fileName);
00217 AllocVar(att);
00218 att->name = cloneString(word);
00219 att->mixedCaseName = mixedCaseName("", att->name);
00220 word = needNextWord(&line, lf);
00221 att->type = cloneString(word);
00222 line = skipLeadingSpaces(line);
00223 if (line[0] == '#')
00224     {
00225     word = needNextWord(&line, lf);
00226     if (sameWord("#REQUIRED", word))
00227         att->required = TRUE;
00228     else if (sameWord("#IMPLIED", word))
00229         att->usual = NULL;
00230     else
00231         errAbort("Unknown directive %s line %d of %s", word, lf->lineIx, lf->fileName);
00232     }
00233 else if (line[0] == '\'' || line[0] == '"')
00234     {
00235     word = line;
00236     needQuotedString(word, word, lf, &line);
00237     att->usual = cloneString(word);
00238     }
00239 else
00240     {
00241     word = needNextWord(&line, lf);
00242     att->usual = cloneString(word);
00243     }
00244 slAddTail(&el->attributes, att);
00245 }

Here is the call graph for this function:

Here is the caller graph for this function:

static struct dtdElement* parseElement ( char *  prefix,
char *  textField,
char *  line,
struct hash elHash,
struct lineFile lf 
) [static, read]

Definition at line 115 of file dtdParse.c.

References AllocVar, ArraySize, dtdElement::children, chopString(), cloneString(), dtdElChild::copyCode, errAbort(), lineFile::fileName, hashAddSaveName(), hashFindVal(), dtdElChild::isOr, lineFile::lineIx, dtdElement::lineIx, mixedCaseName(), dtdElement::mixedCaseName, dtdElChild::name, name, dtdElement::name, needNextWord(), sameString, slAddHead, slReverse(), and dtdElement::textType.

Referenced by dtdParse().

00119 {
00120 char *word, *s, *e;
00121 char *words[256];
00122 int wordCount, i;
00123 struct dtdElChild *ec;
00124 struct dtdElement *el;
00125 boolean isOr;
00126 char orCopyCode = '?';
00127 
00128 word = needNextWord(&line, lf);
00129 s = word + strlen(word)-1;
00130 if (s[0] == '>')
00131    *s = 0;
00132 if ((el = hashFindVal(elHash, word)) != NULL)
00133     errAbort("Duplicate element %s line %d and %d of %s", word, el->lineIx, lf->lineIx, lf->fileName);
00134 AllocVar(el);
00135 el->lineIx = lf->lineIx;
00136 hashAddSaveName(elHash, word, el, &el->name);
00137 el->mixedCaseName = mixedCaseName(prefix, el->name);
00138 if (line != NULL && (s = strchr(line, '(')) != NULL)
00139     {
00140     s += 1;
00141     if ((e = strchr(line, ')')) == NULL)
00142         errAbort("Missing ')' line %d of %s", lf->lineIx, lf->fileName);
00143     *e = 0;
00144     isOr = (strchr(s, '|') != NULL);
00145     if (isOr)
00146       {
00147         orCopyCode = *(e+1);
00148         if ((orCopyCode != '+') && (orCopyCode != '*'))
00149           orCopyCode = '?';
00150       }
00151     wordCount = chopString(s, "| ,\t", words, ArraySize(words));
00152     if (wordCount == ArraySize(words))
00153         errAbort("Too many children in list line %d of %s", lf->lineIx, lf->fileName);
00154     for (i=0; i<wordCount; ++i)
00155         {
00156         char *name = words[i];
00157         int len = strlen(name);
00158         char lastC = name[len-1];
00159         if (name[0] == '#')
00160             {
00161             if (isOr)
00162                 errAbort("# character in enumeration not allowed line %d of %s",
00163                    lf->lineIx, lf->fileName);
00164             if (el->textType != NULL)
00165                 errAbort("Multiple types for text between tags line %d of %s", 
00166                         lf->lineIx, lf->fileName);
00167             el->textType = cloneString(name);
00168             }
00169         else
00170             {
00171             AllocVar(ec);
00172             slAddHead(&el->children, ec);
00173             ec->isOr = isOr;
00174             if (isOr)
00175                ec->copyCode = orCopyCode;
00176             else
00177                 {
00178                 if (lastC == '+' || lastC == '?' || lastC == '*')
00179                     {
00180                     ec->copyCode = lastC;
00181                     name[len-1] = 0;
00182                     }
00183                 else
00184                     ec->copyCode = '1';
00185                 }
00186             if (sameString(name, textField))
00187                 errAbort("Name conflict with default text field name line %d of %s", lf->lineIx, lf->fileName);
00188             ec->name = cloneString(name);
00189             }
00190         }
00191     slReverse(&el->children);
00192     }
00193 return el;
00194 }

Here is the call graph for this function:

Here is the caller graph for this function:

void parseEntity ( struct hash entityHash,
struct hash predefEntityHash,
char *  line,
struct lineFile lf 
)

Definition at line 248 of file dtdParse.c.

References cloneString(), errAbort(), lineFile::fileName, hashAdd(), hashFindVal(), hashLookup(), lineFile::lineIx, name, needNextWord(), needQuotedString(), sameString, and skipLeadingSpaces().

Referenced by dtdParse().

00251 {
00252 char *percent = needNextWord(&line, lf);
00253 char *name = needNextWord(&line, lf);
00254 char *value = skipLeadingSpaces(line);
00255 if (value[0] != '"')
00256     errAbort("Expecting quoted string at end of ENTITY tag line %d of %s",
00257         lf->lineIx, lf->fileName);
00258 needQuotedString(value, value, lf, &line);
00259 if (!sameString(percent, "%"))
00260     errAbort("Expecting %% after ENTITY tag line %d of %s", lf->lineIx, lf->fileName);
00261 if (hashLookup(predefEntityHash, name) == NULL)
00262 /* We don't want to overwrite the predefined entities.  These are all
00263  * defined to be #PCDATA or CDATA for the benefit of non-UCSC XML tools.
00264  * Internally we map them to #INT/#FLOAT etc. so we can have numbers
00265  * as well as strings in our C structures and relational database tables. */
00266     {
00267     char *oldVal = hashFindVal(entityHash, name);
00268     if (oldVal != NULL)
00269         {
00270         if (!sameString(oldVal, value))
00271             errAbort("Entity %s redefined line %d of %s", name, lf->lineIx, lf->fileName);
00272         }
00273     else
00274         {
00275         hashAdd(entityHash, name, cloneString(value));
00276         }
00277     }
00278 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void syntaxError ( struct lineFile lf  )  [static]

Definition at line 13 of file dtdParse.c.

References errAbort(), lineFile::fileName, and lineFile::lineIx.

Referenced by dtdParse().

00015 {
00016 errAbort("Syntax error line %d of %s", lf->lineIx, lf->fileName);
00017 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Tue Dec 25 19:47:07 2007 for blat by  doxygen 1.5.2