00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "common.h"
00010 #include "dystring.h"
00011 #include "errabort.h"
00012 #include "hash.h"
00013 #include "xp.h"
00014 #include "xmlEscape.h"
00015
00016 static char const rcsid[] = "$Id: xp.c,v 1.15 2005/12/19 17:51:14 kent Exp $";
00017
00018
00019 char xpNextBuf(struct xp *xp)
00020
00021 {
00022 int size = xp->read(xp->userData, xp->inBuf, sizeof(xp->inBuf));
00023 if (size <= 0)
00024 return 0;
00025 xp->inBufEnd = xp->inBuf + size;
00026 xp->in = xp->inBuf+1;
00027 return xp->inBuf[0];
00028 }
00029
00030 #define xpGetChar(xp) \
00031 (xp->in < xp->inBufEnd ? *xp->in++ : xpNextBuf(xp))
00032
00033
00034 #define xpUngetChar(xp) \
00035 (--xp->in)
00036
00037
00038 struct xp *xpNew(void *userData,
00039 void (*atStartTag)(void *userData, char *name, char **atts),
00040 void (*atEndTag)(void *userData, char *name, char *text),
00041 int (*read)(void *userData, char *buf, int bufSize),
00042 char *fileName)
00043
00044
00045 {
00046 struct xp *xp;
00047 AllocVar(xp);
00048 xp->stack = xp->stackBufEnd = xp->stackBuf + ArraySize(xp->stackBuf);
00049 xp->userData = userData;
00050 xp->atStartTag = atStartTag;
00051 xp->atEndTag = atEndTag;
00052 xp->read = read;
00053 xp->lineIx = 1;
00054 xp->endTag = newDyString(64);
00055 if (fileName)
00056 xp->fileName = cloneString(fileName);
00057 else
00058 xp->fileName = cloneString("XML");
00059 xp->inBufEnd = xp->in = xp->inBuf;
00060 xp->symHash = xmlEscapeSymHash();
00061 return xp;
00062 }
00063
00064 int xpReadFromFile(void *userData, char *buf, int bufSize)
00065
00066 {
00067 FILE *f = userData;
00068 return fread(buf, 1, bufSize, f);
00069 }
00070
00071
00072
00073 void xpFree(struct xp **pXp)
00074
00075 {
00076 int i;
00077 struct xp *xp = *pXp;
00078 if (xp != NULL)
00079 {
00080 struct xpStack *stack;
00081 for (stack = xp->stackBufEnd; --stack >= xp->stackBuf; )
00082 {
00083 if (stack->tag == NULL)
00084 break;
00085 freeDyString(&stack->tag);
00086 freeDyString(&stack->text);
00087 }
00088 for (i=0; i<ArraySize(xp->attDyBuf); ++i)
00089 {
00090 if (xp->attDyBuf[i] == NULL)
00091 break;
00092 freeDyString(&xp->attDyBuf[i]);
00093 }
00094 freeDyString(&xp->endTag);
00095 freeMem(xp->fileName);
00096 hashFree(&xp->symHash);
00097 freez(pXp);
00098 }
00099 }
00100
00101 int xpLineIx(struct xp *xp)
00102
00103 {
00104 return xp->lineIx;
00105 }
00106
00107 char *xpFileName(struct xp *xp)
00108
00109 {
00110 return xp->fileName;
00111 }
00112
00113 void xpError(struct xp *xp, char *format, ...)
00114
00115 {
00116 va_list args;
00117 va_start(args, format);
00118 vaWarn(format, args);
00119 errAbort("line %d of %s", xpLineIx(xp), xpFileName(xp));
00120 va_end(args);
00121 }
00122
00123 static void xpUnexpectedEof(struct xp *xp)
00124
00125 {
00126 xpError(xp, "Unexpected end of file.");
00127 }
00128
00129 static void xpEatComment(struct xp *xp, char commentC)
00130
00131 {
00132 int startLine = xp->lineIx;
00133 char lastC = 0;
00134 char c;
00135 for (;;)
00136 {
00137 if ((c = xpGetChar(xp)) == 0)
00138 xpError(xp, "End of file in comment that started line %d", startLine);
00139 if (c == '\n')
00140 ++xp->lineIx;
00141 if (c == '>')
00142 {
00143 if (lastC == commentC || commentC == '!')
00144 break;
00145 }
00146 lastC = c;
00147 }
00148 }
00149
00150 static void xpLookup(struct xp *xp, struct dyString *temp, struct dyString *text)
00151
00152 {
00153 char c;
00154 char *s;
00155 dyStringClear(temp);
00156 for (;;)
00157 {
00158 if ((c = xpGetChar(xp)) == 0)
00159 xpError(xp, "End of file in after & and before ;");
00160 if (isspace(c))
00161 xpError(xp, "& without ;");
00162 if (c == ';')
00163 break;
00164 dyStringAppendC(temp, c);
00165 }
00166 s = temp->string;
00167 if (s[0] == '#')
00168 {
00169 c = atoi(s+1);
00170 dyStringAppendC(text, c);
00171 }
00172 else if ((s = hashFindVal(xp->symHash, s)) == NULL)
00173 {
00174 dyStringAppendC(text, '&');
00175 dyStringAppend(text, temp->string);
00176 dyStringAppendC(text, ';');
00177 }
00178 else
00179 {
00180 dyStringAppend(text, s);
00181 }
00182 }
00183
00184 void xpForceMatch(struct xp *xp, char *matchString)
00185
00186 {
00187 char *match = matchString, m;
00188 while ((m = *match++) != 0)
00189 {
00190 if (m != xpGetChar(xp))
00191 xpError(xp, "Expecting %s", matchString);
00192 }
00193 }
00194
00195 void xpTextUntil(struct xp *xp, char *endPattern)
00196
00197 {
00198 int endSize = strlen(endPattern);
00199 int endPos = 0;
00200 char c;
00201 struct dyString *dy = xp->stack->text;
00202 for (;;)
00203 {
00204 if ((c = xpGetChar(xp)) == 0)
00205 xpUnexpectedEof(xp);
00206 if (c == endPattern[endPos])
00207 {
00208 endPos += 1;
00209 if (endPos == endSize)
00210 return;
00211 }
00212 else
00213 {
00214 if (endPos > 0)
00215 dyStringAppendN(dy, endPattern, endPos);
00216 dyStringAppendC(dy, c);
00217 endPos = 0;
00218 }
00219 }
00220 }
00221
00222
00223 void xpParseStartTag(struct xp *xp,
00224 int maxAttCount,
00225 struct dyString *retName,
00226 int *retAttCount,
00227 struct dyString **retAttributes,
00228 boolean *retClosed)
00229
00230
00231 {
00232 char c, quotC;
00233 int attCount = 0;
00234 struct dyString *dy;
00235 int lineStart;
00236
00237 dyStringClear(retName);
00238
00239
00240 for (;;)
00241 {
00242 if ((c = xpGetChar(xp)) == 0)
00243 xpUnexpectedEof(xp);
00244 if (isspace(c))
00245 {
00246 if (c == '\n')
00247 ++xp->lineIx;
00248 }
00249 else
00250 break;
00251 }
00252
00253
00254 for (;;)
00255 {
00256 dyStringAppendC(retName, c);
00257 if ((c = xpGetChar(xp)) == 0)
00258 xpUnexpectedEof(xp);
00259 if (c == '>' || c == '/' || isspace(c))
00260 break;
00261 }
00262 if (c == '\n')
00263 ++xp->lineIx;
00264
00265
00266 if (c != '>' && c != '/')
00267 {
00268 for (;;)
00269 {
00270
00271 for (;;)
00272 {
00273 if ((c = xpGetChar(xp)) == 0)
00274 xpUnexpectedEof(xp);
00275 if (isspace(c))
00276 {
00277 if (c == '\n')
00278 ++xp->lineIx;
00279 }
00280 else
00281 break;
00282 }
00283 if (c == '>' || c == '/')
00284 break;
00285
00286
00287 if (attCount >= maxAttCount - 2)
00288 xpError(xp, "Attribute stack overflow");
00289 dy = retAttributes[attCount];
00290 if (dy == NULL)
00291 dy = retAttributes[attCount] = newDyString(64);
00292 else
00293 dyStringClear(dy);
00294 ++attCount;
00295
00296
00297 for (;;)
00298 {
00299 dyStringAppendC(dy, c);
00300 if ((c = xpGetChar(xp)) == 0)
00301 xpUnexpectedEof(xp);
00302 if (isspace(c))
00303 {
00304 if (c == '\n')
00305 ++xp->lineIx;
00306 break;
00307 }
00308 if (c == '=')
00309 break;
00310 if (c == '/' || c == '>')
00311 xpError(xp, "Expecting '=' after attribute name");
00312 }
00313
00314
00315 if (c != '=')
00316 {
00317 for (;;)
00318 {
00319 if ((c = xpGetChar(xp)) == 0)
00320 xpUnexpectedEof(xp);
00321 if (isspace(c))
00322 {
00323 if (c == '\n')
00324 ++xp->lineIx;
00325 }
00326 else
00327 break;
00328 }
00329 if (c != '=')
00330 xpError(xp, "Expecting '=' after attribute name");
00331 }
00332
00333
00334 for (;;)
00335 {
00336 if ((c = xpGetChar(xp)) == 0)
00337 xpUnexpectedEof(xp);
00338 else if (isspace(c))
00339 {
00340 if (c == '\n')
00341 ++xp->lineIx;
00342 }
00343 else
00344 break;
00345 }
00346 if (c != '\'' && c != '"')
00347 xpError(xp, "Expecting quoted string after =");
00348
00349
00350 if (attCount >= maxAttCount - 2)
00351 xpError(xp, "Attribute stack overflow");
00352 dy = retAttributes[attCount];
00353 if (dy == NULL)
00354 dy = retAttributes[attCount] = newDyString(64);
00355 else
00356 dyStringClear(dy);
00357 ++attCount;
00358
00359
00360 quotC = c;
00361 lineStart = xp->lineIx;
00362 for (;;)
00363 {
00364 if ((c = xpGetChar(xp)) == 0)
00365 xpError(xp, "End of file inside literal string that started at line %d", lineStart);
00366 if (c == quotC)
00367 break;
00368 if (c == '&')
00369 xpLookup(xp, xp->endTag, dy);
00370 else
00371 {
00372 if (c == '\n')
00373 ++xp->lineIx;
00374 dyStringAppendC(dy, c);
00375 }
00376 }
00377 }
00378 }
00379 if (c == '/')
00380 {
00381 *retClosed = TRUE;
00382 c = xpGetChar(xp);
00383 if (c != '>')
00384 xpError(xp, "Expecting '>' after '/'");
00385 }
00386 else
00387 *retClosed = FALSE;
00388 *retAttCount = attCount;
00389 }
00390
00391 void xpParseEndTag(struct xp *xp, char *tagName)
00392
00393
00394 {
00395 struct dyString *dy = xp->endTag;
00396 char c;
00397
00398 dyStringClear(dy);
00399
00400
00401 for (;;)
00402 {
00403 if ((c = xpGetChar(xp)) == 0)
00404 xpUnexpectedEof(xp);
00405 if (isspace(c))
00406 {
00407 if (c == '\n')
00408 ++xp->lineIx;
00409 }
00410 else
00411 break;
00412 }
00413
00414
00415 for (;;)
00416 {
00417 dyStringAppendC(dy, c);
00418 if ((c = xpGetChar(xp)) == 0)
00419 xpUnexpectedEof(xp);
00420 if (isspace(c))
00421 {
00422 if (c == '\n')
00423 ++xp->lineIx;
00424 break;
00425 }
00426 if (c == '>')
00427 break;
00428 }
00429
00430
00431 while (c != '>')
00432 {
00433 dyStringAppendC(dy, c);
00434 if ((c = xpGetChar(xp)) == 0)
00435 xpUnexpectedEof(xp);
00436 if (isspace(c))
00437 {
00438 if (c == '\n')
00439 ++xp->lineIx;
00440 }
00441 else if (c != '>')
00442 xpError(xp, "Unexpected characters past first word in /%s tag", dy->string);
00443 }
00444
00445 if (!sameString(dy->string, tagName))
00446 xpError(xp, "Mismatch between start tag %s and end tag %s", tagName, dy->string);
00447 }
00448
00449 boolean xpParseNext(struct xp *xp, char *tag)
00450
00451
00452
00453
00454
00455 {
00456 char c;
00457 int i, attCount = 0;
00458 struct dyString *text = NULL;
00459 boolean isClosed;
00460 boolean inside = (tag == NULL);
00461 struct xpStack *initialStack = xp->stack;
00462
00463 for (;;)
00464 {
00465
00466 for (;;)
00467 {
00468 if ((c = xpGetChar(xp)) == 0)
00469 return FALSE;
00470 if (c == '<')
00471 break;
00472 if (c == '&')
00473 xpLookup(xp, xp->endTag, text);
00474 else
00475 {
00476 if (c == '\n')
00477 ++xp->lineIx;
00478 if (text != NULL)
00479 dyStringAppendC(text, c);
00480 }
00481 }
00482
00483
00484 c = xpGetChar(xp);
00485 if (c == 0)
00486 xpError(xp, "End of file inside tag");
00487 else if (c == '?' || c == '!')
00488 xpEatComment(xp, c);
00489 else if (c == '/')
00490 {
00491 struct xpStack *stack = xp->stack;
00492 if (stack >= xp->stackBufEnd)
00493 xpError(xp, "Extra end tag");
00494 xpParseEndTag(xp, stack->tag->string);
00495 if (inside)
00496 xp->atEndTag(xp->userData, stack->tag->string, stack->text->string);
00497 xp->stack += 1;
00498 if (xp->stack == initialStack)
00499 return TRUE;
00500 }
00501 else
00502 {
00503
00504 struct xpStack *stack = --xp->stack;
00505 if (stack < xp->stackBuf)
00506 xpError(xp, "Stack overflow");
00507 if (stack->tag == NULL)
00508 stack->tag = newDyString(32);
00509 else
00510 dyStringClear(stack->tag);
00511 if (stack->text == NULL)
00512 stack->text = newDyString(256);
00513 else
00514 dyStringClear(stack->text);
00515 text = stack->text;
00516
00517
00518 xpUngetChar(xp);
00519 xpParseStartTag(xp, ArraySize(xp->attDyBuf), stack->tag,
00520 &attCount, xp->attDyBuf, &isClosed);
00521
00522 if (!inside && sameString(stack->tag->string, tag))
00523 {
00524 inside = TRUE;
00525 initialStack = xp->stack + 1;
00526 }
00527
00528
00529 if (inside)
00530 {
00531
00532 for (i=0; i<attCount; ++i)
00533 xp->attBuf[i] = xp->attDyBuf[i]->string;
00534 xp->attBuf[attCount] = NULL;
00535 xp->atStartTag(xp->userData, stack->tag->string, xp->attBuf);
00536 }
00537 if (isClosed)
00538 {
00539 if (inside)
00540 xp->atEndTag(xp->userData, stack->tag->string, stack->text->string);
00541 xp->stack += 1;
00542 if (xp->stack == initialStack)
00543 return TRUE;
00544 }
00545 }
00546 }
00547 }
00548
00549 void xpParse(struct xp *xp)
00550
00551 {
00552 xpParseNext(xp, NULL);
00553 }
00554