00001
00002
00003
00004
00005
00006 #include "common.h"
00007 #include "errabort.h"
00008 #include "linefile.h"
00009 #include "tokenizer.h"
00010
00011 static char const rcsid[] = "$Id: tokenizer.c,v 1.3 2004/07/14 05:47:14 kent Exp $";
00012
00013 struct tokenizer *tokenizerOnLineFile(struct lineFile *lf)
00014
00015 {
00016 struct tokenizer *tkz;
00017 AllocVar(tkz);
00018 tkz->sAlloc = 128;
00019 tkz->string = needMem(tkz->sAlloc);
00020 tkz->lf = lf;
00021 tkz->curLine = tkz->linePt = "";
00022 return tkz;
00023 }
00024
00025 struct tokenizer *tokenizerNew(char *fileName)
00026
00027 {
00028 return tokenizerOnLineFile(lineFileOpen(fileName, TRUE));
00029 }
00030
00031 void tokenizerFree(struct tokenizer **pTkz)
00032
00033 {
00034 struct tokenizer *tkz;
00035 if ((tkz = *pTkz) != NULL)
00036 {
00037 freeMem(tkz->string);
00038 lineFileClose(&tkz->lf);
00039 freez(pTkz);
00040 }
00041 }
00042
00043 void tokenizerReuse(struct tokenizer *tkz)
00044
00045 {
00046 tkz->reuse = TRUE;
00047 }
00048
00049 int tokenizerLineCount(struct tokenizer *tkz)
00050
00051 {
00052 return tkz->lf->lineIx;
00053 }
00054
00055 char *tokenizerFileName(struct tokenizer *tkz)
00056
00057 {
00058 return tkz->lf->fileName;
00059 }
00060
00061 char *tokenizerNext(struct tokenizer *tkz)
00062
00063
00064 {
00065 char *start, *end;
00066 char c, *s;
00067 int size;
00068 if (tkz->reuse)
00069 {
00070 tkz->reuse = FALSE;
00071 return tkz->string;
00072 }
00073 for (;;)
00074 {
00075 int lineSize;
00076 s = start = skipLeadingSpaces(tkz->linePt);
00077 if ((c = start[0]) != 0)
00078 {
00079 if (tkz->uncommentC && c == '/')
00080 {
00081 if (start[1] == '/')
00082 ;
00083 else if (start[1] == '*')
00084 {
00085 start += 2;
00086 for (;;)
00087 {
00088 char *end = stringIn("*/", start);
00089 if (end != NULL)
00090 {
00091 tkz->linePt = end+2;
00092 break;
00093 }
00094 if (!lineFileNext(tkz->lf, &tkz->curLine, &lineSize))
00095 errAbort("End of file (%s) in comment", tokenizerFileName(tkz));
00096 start = tkz->curLine;
00097 }
00098 continue;
00099 }
00100 else
00101 break;
00102 }
00103 else if (tkz->uncommentShell && c == '#')
00104 ;
00105 else
00106 break;
00107 }
00108 if (!lineFileNext(tkz->lf, &tkz->curLine, &lineSize))
00109 {
00110 tkz->eof = TRUE;
00111 return NULL;
00112 }
00113 tkz->linePt = tkz->curLine;
00114 }
00115 if (isalnum(c) || (c == '_'))
00116 {
00117 for (;;)
00118 {
00119 s++;
00120 if (!(isalnum(*s) || (*s == '_')))
00121 break;
00122 }
00123 end = s;
00124 }
00125 else if (c == '"' || c == '\'')
00126 {
00127 char quot = c;
00128 if (tkz->leaveQuotes)
00129 start = s++;
00130 else
00131 start = ++s;
00132 for (;;)
00133 {
00134 c = *s;
00135 if (c == quot)
00136 {
00137 if (s[-1] == '\\')
00138 {
00139 if (s >= start+2 && s[-2] == '\\')
00140 break;
00141 }
00142 else
00143 break;
00144 }
00145 else if (c == 0)
00146 {
00147 break;
00148 }
00149 ++s;
00150 }
00151 end = s;
00152 if (c != 0)
00153 ++s;
00154 if (tkz->leaveQuotes)
00155 end += 1;
00156 }
00157 else
00158 {
00159 end = ++s;
00160 }
00161 tkz->linePt = s;
00162 size = end - start;
00163 if (size >= tkz->sAlloc)
00164 {
00165 tkz->sAlloc = size+128;
00166 tkz->string = needMoreMem(tkz->string, 0, tkz->sAlloc);
00167 }
00168 memcpy(tkz->string, start, size);
00169 tkz->string[size] = 0;
00170 return tkz->string;
00171 }
00172
00173
00174 void tokenizerErrAbort(struct tokenizer *tkz, char *format, ...)
00175
00176
00177 {
00178 va_list args;
00179 va_start(args, format);
00180 vaWarn(format, args);
00181 errAbort("line %d of %s:\n%s",
00182 tokenizerLineCount(tkz), tokenizerFileName(tkz), tkz->curLine);
00183 }
00184
00185 void tokenizerNotEnd(struct tokenizer *tkz)
00186
00187 {
00188 if (tkz->eof)
00189 errAbort("Unexpected end of file");
00190 }
00191
00192 void tokenizerMustHaveNext(struct tokenizer *tkz)
00193
00194 {
00195 if (tokenizerNext(tkz) == NULL)
00196 errAbort("Unexpected end of file");
00197 }
00198
00199 void tokenizerMustMatch(struct tokenizer *tkz, char *string)
00200
00201
00202 {
00203 if (sameWord(tkz->string, string))
00204 tokenizerMustHaveNext(tkz);
00205 else
00206 tokenizerErrAbort(tkz, "Expecting %s got %s", string, tkz->string);
00207 }
00208