00001
00002
00003
00004
00005 #include "common.h"
00006 #include "hash.h"
00007 #include "linefile.h"
00008 #include "dystring.h"
00009 #include "obscure.h"
00010 #include "dtdParse.h"
00011
00012
00013 static void syntaxError(struct lineFile *lf)
00014
00015 {
00016 errAbort("Syntax error line %d of %s", lf->lineIx, lf->fileName);
00017 }
00018
00019 static char *needNextWord(char **pLine, struct lineFile *lf)
00020
00021 {
00022 char *word = nextWord(pLine);
00023 if (word == NULL)
00024 errAbort("Missing data line %d of %s", lf->lineIx, lf->fileName);
00025 return word;
00026 }
00027
00028 void needQuotedString( char *in, char *out, struct lineFile *lf, char **retNext)
00029
00030
00031 {
00032 if (!parseQuotedString(in, out, retNext))
00033 errAbort("Missing closing quote line %d of %s", lf->lineIx, lf->fileName);
00034 }
00035
00036 static boolean isAllUpper(char *s)
00037
00038
00039 {
00040 char c;
00041 while ((c = *s++) != 0)
00042 {
00043 if (isalpha(c) && !isupper(c))
00044 return FALSE;
00045 }
00046 return TRUE;
00047 }
00048
00049 static boolean isAllLower(char *s)
00050
00051
00052 {
00053 char c;
00054 while ((c = *s++) != 0)
00055 {
00056 if (isalpha(c) && !islower(c))
00057 return FALSE;
00058 }
00059 return TRUE;
00060 }
00061
00062
00063 static char *mixedCaseName(char *prefix, char *orig)
00064
00065
00066 {
00067 char *mixed;
00068 char *d, *s = orig;
00069 char c;
00070 int prefixLen = strlen(prefix), len;
00071 boolean nextUpper;
00072 boolean allUpper = isAllUpper(orig);
00073 boolean allLower = isAllLower(orig);
00074 boolean initiallyMixed = (!allUpper && !allLower);
00075
00076
00077 len = strlen(orig) + prefixLen;
00078 mixed = d = needMem(len+1);
00079 strcpy(d, prefix);
00080 d += prefixLen;
00081 nextUpper = (prefixLen > 0);
00082
00083 for (;;)
00084 {
00085 c = *s++;
00086 if (c == '_' || c == '-' || c == ':')
00087 nextUpper = TRUE;
00088 else
00089 {
00090 if (nextUpper)
00091 c = toupper(c);
00092 else if (!initiallyMixed)
00093 c = tolower(c);
00094 nextUpper = FALSE;
00095 *d++ = c;
00096 if (c == 0)
00097 break;
00098 }
00099 }
00100 return mixed;
00101 }
00102
00103 static struct hash *initialEntityHash()
00104
00105
00106 {
00107 struct hash *hash = hashNew(0);
00108 hashAdd(hash, "INTEGER", cloneString("#INT"));
00109 hashAdd(hash, "REAL", cloneString("#FLOAT"));
00110 hashAdd(hash, "INT", cloneString("INT"));
00111 hashAdd(hash, "FLOAT", cloneString("FLOAT"));
00112 return hash;
00113 }
00114
00115 static struct dtdElement *parseElement(
00116 char *prefix, char *textField, char *line,
00117 struct hash *elHash, struct lineFile *lf)
00118
00119 {
00120 char *word, *s, *e;
00121 char *words[256];
00122 int wordCount, i;
00123 struct dtdElChild *ec;
00124 struct dtdElement *el;
00125 boolean isOr;
00126 char orCopyCode = '?';
00127
00128 word = needNextWord(&line, lf);
00129 s = word + strlen(word)-1;
00130 if (s[0] == '>')
00131 *s = 0;
00132 if ((el = hashFindVal(elHash, word)) != NULL)
00133 errAbort("Duplicate element %s line %d and %d of %s", word, el->lineIx, lf->lineIx, lf->fileName);
00134 AllocVar(el);
00135 el->lineIx = lf->lineIx;
00136 hashAddSaveName(elHash, word, el, &el->name);
00137 el->mixedCaseName = mixedCaseName(prefix, el->name);
00138 if (line != NULL && (s = strchr(line, '(')) != NULL)
00139 {
00140 s += 1;
00141 if ((e = strchr(line, ')')) == NULL)
00142 errAbort("Missing ')' line %d of %s", lf->lineIx, lf->fileName);
00143 *e = 0;
00144 isOr = (strchr(s, '|') != NULL);
00145 if (isOr)
00146 {
00147 orCopyCode = *(e+1);
00148 if ((orCopyCode != '+') && (orCopyCode != '*'))
00149 orCopyCode = '?';
00150 }
00151 wordCount = chopString(s, "| ,\t", words, ArraySize(words));
00152 if (wordCount == ArraySize(words))
00153 errAbort("Too many children in list line %d of %s", lf->lineIx, lf->fileName);
00154 for (i=0; i<wordCount; ++i)
00155 {
00156 char *name = words[i];
00157 int len = strlen(name);
00158 char lastC = name[len-1];
00159 if (name[0] == '#')
00160 {
00161 if (isOr)
00162 errAbort("# character in enumeration not allowed line %d of %s",
00163 lf->lineIx, lf->fileName);
00164 if (el->textType != NULL)
00165 errAbort("Multiple types for text between tags line %d of %s",
00166 lf->lineIx, lf->fileName);
00167 el->textType = cloneString(name);
00168 }
00169 else
00170 {
00171 AllocVar(ec);
00172 slAddHead(&el->children, ec);
00173 ec->isOr = isOr;
00174 if (isOr)
00175 ec->copyCode = orCopyCode;
00176 else
00177 {
00178 if (lastC == '+' || lastC == '?' || lastC == '*')
00179 {
00180 ec->copyCode = lastC;
00181 name[len-1] = 0;
00182 }
00183 else
00184 ec->copyCode = '1';
00185 }
00186 if (sameString(name, textField))
00187 errAbort("Name conflict with default text field name line %d of %s", lf->lineIx, lf->fileName);
00188 ec->name = cloneString(name);
00189 }
00190 }
00191 slReverse(&el->children);
00192 }
00193 return el;
00194 }
00195
00196 static void parseAttribute(char *line, char *textField,
00197 struct hash *elHash, struct lineFile *lf)
00198
00199 {
00200 char *word;
00201 struct dtdAttribute *att;
00202 struct dtdElement *el;
00203 char *e;
00204
00205
00206 e = strrchr(line, '>');
00207 if (e == NULL)
00208 errAbort("Missing '>' line %d of %s", lf->lineIx, lf->fileName);
00209 *e = 0;
00210
00211 word = needNextWord(&line, lf);
00212 if ((el = hashFindVal(elHash, word)) == NULL)
00213 errAbort("Undefined %s line %d of %s", word, lf->lineIx, lf->fileName);
00214 word = needNextWord(&line, lf);
00215 if (sameString(word, textField))
00216 errAbort("Name conflict with text field name line %d of %s", lf->lineIx, lf->fileName);
00217 AllocVar(att);
00218 att->name = cloneString(word);
00219 att->mixedCaseName = mixedCaseName("", att->name);
00220 word = needNextWord(&line, lf);
00221 att->type = cloneString(word);
00222 line = skipLeadingSpaces(line);
00223 if (line[0] == '#')
00224 {
00225 word = needNextWord(&line, lf);
00226 if (sameWord("#REQUIRED", word))
00227 att->required = TRUE;
00228 else if (sameWord("#IMPLIED", word))
00229 att->usual = NULL;
00230 else
00231 errAbort("Unknown directive %s line %d of %s", word, lf->lineIx, lf->fileName);
00232 }
00233 else if (line[0] == '\'' || line[0] == '"')
00234 {
00235 word = line;
00236 needQuotedString(word, word, lf, &line);
00237 att->usual = cloneString(word);
00238 }
00239 else
00240 {
00241 word = needNextWord(&line, lf);
00242 att->usual = cloneString(word);
00243 }
00244 slAddTail(&el->attributes, att);
00245 }
00246
00247
00248 void parseEntity(struct hash *entityHash, struct hash *predefEntityHash,
00249 char *line, struct lineFile *lf)
00250
00251 {
00252 char *percent = needNextWord(&line, lf);
00253 char *name = needNextWord(&line, lf);
00254 char *value = skipLeadingSpaces(line);
00255 if (value[0] != '"')
00256 errAbort("Expecting quoted string at end of ENTITY tag line %d of %s",
00257 lf->lineIx, lf->fileName);
00258 needQuotedString(value, value, lf, &line);
00259 if (!sameString(percent, "%"))
00260 errAbort("Expecting %% after ENTITY tag line %d of %s", lf->lineIx, lf->fileName);
00261 if (hashLookup(predefEntityHash, name) == NULL)
00262
00263
00264
00265
00266 {
00267 char *oldVal = hashFindVal(entityHash, name);
00268 if (oldVal != NULL)
00269 {
00270 if (!sameString(oldVal, value))
00271 errAbort("Entity %s redefined line %d of %s", name, lf->lineIx, lf->fileName);
00272 }
00273 else
00274 {
00275 hashAdd(entityHash, name, cloneString(value));
00276 }
00277 }
00278 }
00279
00280
00281 static void fixupChildRefs(struct dtdElement *elList, struct hash *elHash, char *fileName)
00282
00283
00284 {
00285 struct dtdElement *el, *child;
00286 struct dtdElChild *ec;
00287 for (el = elList; el != NULL; el = el->next)
00288 {
00289 for (ec = el->children; ec != NULL; ec = ec->next)
00290 {
00291 if ((child = hashFindVal(elHash, ec->name)) == NULL)
00292 errAbort("%s's child %s undefined line %d of %s", el->name, ec->name, el->lineIx, fileName);
00293 ec->el = child;
00294 }
00295 }
00296 }
00297
00298 static char *eatComment(struct lineFile *lf, char *line)
00299
00300 {
00301 char *s;
00302 for (;;)
00303 {
00304 if ((s = stringIn("-->", line)) != NULL)
00305 {
00306 line = skipLeadingSpaces(s+3);
00307 if (line[0] == 0)
00308 line = NULL;
00309 return line;
00310 }
00311 if (!lineFileNext(lf, &line, NULL))
00312 return NULL;
00313 }
00314 }
00315
00316 static void expandEntities(char *s, struct hash *entityHash, struct lineFile *lf,
00317 struct dyString *dest)
00318
00319
00320 {
00321 char c;
00322 while ((c = *s++) != 0)
00323 {
00324 if (c == '%' && !isspace(s[0]))
00325 {
00326 char *name = s;
00327 char *end = strchr(s, ';');
00328 char *value;
00329 if (end == NULL)
00330 errAbort("Can't find ; after %% to close entity line %d of %s",
00331 lf->lineIx, lf->fileName);
00332 *end++ = 0;
00333 s = end;
00334 value = hashFindVal(entityHash, name);
00335 if (value == NULL)
00336 errAbort("Entity %%%s; is not defined line %d of %s",
00337 name, lf->lineIx, lf->fileName);
00338 dyStringAppend(dest, value);
00339 }
00340 else
00341 dyStringAppendC(dest, c);
00342 }
00343 }
00344
00345 static char *dtdxTag(struct lineFile *lf, struct hash *entityHash,
00346 struct dyString *buf)
00347
00348 {
00349 char *line;
00350
00351
00352 if (!lineFileNextReal(lf, &line))
00353 return NULL;
00354 line = trimSpaces(line);
00355 if (line[0] != '<')
00356 errAbort("Text outside of a tag line %d of %s", lf->lineIx, lf->fileName);
00357 dyStringClear(buf);
00358 for (;;)
00359 {
00360 expandEntities(line, entityHash, lf, buf);
00361 if (buf->string[buf->stringSize-1] == '>')
00362 break;
00363 dyStringAppendC(buf, ' ');
00364 if (!lineFileNext(lf, &line, NULL))
00365 errAbort("End of file %s inside of a tag.", lf->fileName);
00366 line = trimSpaces(line);
00367 }
00368 return buf->string;
00369 }
00370
00371 void dtdParse(char *fileName, char *prefix, char *textField,
00372 struct dtdElement **retList, struct hash **retHash)
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383 {
00384 struct hash *elHash = newHash(8);
00385 struct hash *entityHash = initialEntityHash();
00386 struct hash *predefEntityHash = initialEntityHash();
00387 struct dtdElement *elList = NULL, *el;
00388 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00389 char *line, *word;
00390 struct dyString *buf = dyStringNew(0);
00391
00392 if (prefix == NULL)
00393 prefix = "";
00394 if (textField == NULL)
00395 textField = "text";
00396 while ((line = dtdxTag(lf, entityHash, buf)) != NULL)
00397 {
00398 line = trimSpaces(line);
00399 if (line == NULL || line[0] == 0 || line[0] == '#')
00400 continue;
00401 if (startsWith("<!--", line))
00402 {
00403 line = eatComment(lf, line);
00404 if (line == NULL)
00405 continue;
00406 }
00407 if (!startsWith("<!", line))
00408 syntaxError(lf);
00409 line += 2;
00410 word = needNextWord(&line, lf);
00411 if (sameWord("ELEMENT", word))
00412 {
00413 el = parseElement(prefix, textField, line, elHash, lf);
00414 slAddHead(&elList, el);
00415 }
00416 else if (sameWord("ATTLIST", word))
00417 {
00418 parseAttribute(line, textField, elHash, lf);
00419 }
00420 else if (sameWord("ENTITY", word))
00421 {
00422 parseEntity(entityHash, predefEntityHash, line, lf);
00423 }
00424 else
00425 {
00426 errAbort("Don't understand %s line %d of %s", word, lf->lineIx, lf->fileName);
00427 }
00428 }
00429 lineFileClose(&lf);
00430 dyStringFree(&buf);
00431 slReverse(&elList);
00432 fixupChildRefs(elList, elHash, fileName);
00433 freeHashAndVals(&entityHash);
00434 freeHashAndVals(&predefEntityHash);
00435 *retHash = elHash;
00436 *retList = elList;
00437 }
00438
00439 void dtdElementDump(struct dtdElement *el, FILE *f)
00440
00441 {
00442 struct dtdElChild *ec;
00443 struct dtdAttribute *att;
00444 fprintf(f, "%s %s (", el->name, el->mixedCaseName);
00445 for (ec = el->children; ec != NULL; ec = ec->next)
00446 {
00447 fprintf(f, "%s", ec->name);
00448 if (ec->copyCode != '1')
00449 fprintf(f, "%c", ec->copyCode);
00450 if (ec->isOr)
00451 fprintf(f, " (isOr)");
00452 if (ec->next != NULL)
00453 fprintf(f, ", ");
00454 }
00455 fprintf(f, ")");
00456 if (el->textType != NULL)
00457 fprintf(f, " (%s)", el->textType);
00458 fprintf(f, "\n");
00459 for (att = el->attributes; att != NULL; att = att->next)
00460 {
00461 fprintf(f, " %s %s %s %s\n",
00462 att->name, att->type, (att->usual ? att->usual : "n/a"),
00463 (att->required ? "required" : "optional"));
00464 }
00465 }
00466