lib/kxTok.c

Go to the documentation of this file.
00001 /* kxTok - quick little tokenizer for stuff first
00002  * loaded into memory.  Originally developed for
00003  * "Key eXpression" evaluator. 
00004  *
00005  * This file is copyright 2002 Jim Kent, but license is hereby
00006  * granted for all use - public, private or commercial. */
00007 
00008 #include "common.h"
00009 #include "kxTok.h"
00010 
00011 static char const rcsid[] = "$Id: kxTok.c,v 1.10 2004/11/05 23:08:58 kent Exp $";
00012 
00013 boolean includeQuotes = FALSE;
00014 
00015 static struct kxTok *kxTokNew(enum kxTokType type, char *string, int stringSize,
00016         boolean spaceBefore)
00017 /* Allocate and initialize a new token. */
00018 {
00019 struct kxTok *tok;
00020 int totalSize = stringSize + sizeof(*tok);
00021 tok = needMem(totalSize);
00022 tok->type = type;
00023 tok->spaceBefore = spaceBefore;
00024 memcpy(tok->string, string, stringSize);
00025 return tok;
00026 }
00027 
00028 struct kxTok *kxTokenizeFancy(char *text, boolean wildAst,
00029                               boolean wildPercent, boolean includeHyphen)
00030 /* Convert text to stream of tokens. If 'wildAst' is
00031  * TRUE then '*' character will be treated as wildcard
00032  * rather than multiplication sign.  
00033  * If wildPercent is TRUE then the '%' character will be treated as a 
00034  * wildcard (as in SQL) rather than a modulo (kxtMod) or percent sign.
00035  * If includeHyphen is TRUE then a '-' character in the middle of a String 
00036  * token will be treated as a hyphen (part of the String token) instead of 
00037  * a new kxtSub token. */
00038 {
00039 struct kxTok *tokList = NULL, *tok;
00040 char c, *s, *start = NULL, *end = NULL;
00041 enum kxTokType type = 0;
00042 boolean spaceBefore = FALSE;
00043 
00044 s = text;
00045 for (;;)
00046     {
00047     if ((c = *s) == 0)
00048         break;
00049     start = s++;
00050     if (isspace(c))
00051         {
00052         spaceBefore = TRUE;
00053         continue;
00054         }
00055     else if (isalnum(c) || c == '?' || (wildAst && c == '*') ||
00056              (wildPercent && c == '%'))
00057         {
00058         if (c == '?')
00059             type = kxtWildString;
00060         else if (wildAst && c == '*')
00061             type = kxtWildString;
00062         else if (wildPercent && c == '%')
00063             type = kxtWildString;
00064         else
00065             type = kxtString;
00066         for (;;)
00067             {
00068             c = *s;
00069             if (isalnum(c) || c == ':' || c == '_' || c == '.' ||
00070                 (includeHyphen && c == '-'))
00071                 ++s;
00072             else if (c == '?' || (wildAst && c == '*') ||
00073                      (wildPercent && c == '%'))
00074                 {
00075                 type = kxtWildString;
00076                 ++s;
00077                 }
00078             else
00079                 break;
00080             }
00081         end = s;
00082         }
00083     else if (c == '"')
00084         {
00085         type = kxtString;
00086         if (! includeQuotes)
00087             start = s;
00088         for (;;)
00089             {
00090             c = *s++;
00091             if (c == '"')
00092                 break;
00093             if (c == '*' || c == '?' || (wildPercent && c == '%'))
00094                 type = kxtWildString;
00095             }
00096         if (! includeQuotes)
00097             end = s-1;
00098         else
00099             end = s;
00100         }
00101     else if (c == '\'')
00102         {
00103         type = kxtString;
00104         if (! includeQuotes)
00105             start = s;
00106         for (;;)
00107             {
00108             c = *s++;
00109             if (c == '\'')
00110                 break;
00111             if (c == '*' || c == '?' || (wildPercent && c == '%'))
00112                 type = kxtWildString;
00113             }
00114         if (! includeQuotes)
00115             end = s-1;
00116         else
00117             end = s;
00118         } 
00119     else if (c == '=')
00120         {
00121         type = kxtEquals;
00122         end = s;
00123         }
00124     else if (c == '&')
00125         {
00126         type = kxtAnd;
00127         end = s;
00128         }
00129     else if (c == '|')
00130         {
00131         type = kxtOr;
00132         end = s;
00133         }
00134     else if (c == '^')
00135         {
00136         type = kxtXor;
00137         end = s;
00138         }
00139     else if (c == '+')
00140         {
00141         type = kxtAdd;
00142         end = s;
00143         }
00144     else if (c == '-')
00145         {
00146         type = kxtSub;
00147         end = s;
00148         }
00149     else if (c == '*')
00150         {
00151         type = kxtMul;
00152         end = s;
00153         }
00154     else if (c == '/')
00155         {
00156         type = kxtDiv;
00157         end = s;
00158         }
00159     else if (c == '(')
00160         {
00161         type = kxtOpenParen;
00162         end = s;
00163         }
00164     else if (c == ')')
00165         {
00166         type = kxtCloseParen;
00167         end = s;
00168         }
00169     else if (c == '!')
00170         {
00171         type = kxtNot;
00172         end = s;
00173         }
00174     else if (c == '>')
00175         {
00176         if (*s == '=')
00177             {
00178             ++s;
00179             type = kxtGE;
00180             }
00181         else
00182             type = kxtGT;
00183         end = s;
00184         }
00185     else if (c == '<')
00186         {
00187         if (*s == '=')
00188             {
00189             ++s;
00190             type = kxtLE;
00191             }
00192         else
00193             type = kxtLT;
00194         end = s;
00195         }
00196     else if (c == '.')
00197         {
00198         type = kxtDot;
00199         end = s;
00200         }
00201     else if (c == '%')
00202         {
00203         type = kxtMod;
00204         end = s;
00205         }
00206     else if (ispunct(c))
00207         {
00208         type = kxtPunct;
00209         end = s;
00210         }
00211     else
00212         {
00213         errAbort("Unrecognized character %c", c);
00214         }
00215     tok = kxTokNew(type, start, end-start, spaceBefore);
00216     slAddHead(&tokList, tok);
00217     spaceBefore = FALSE;
00218     }
00219 tok = kxTokNew(kxtEnd, "end", 3, spaceBefore);
00220 slAddHead(&tokList, tok);
00221 slReverse(&tokList);
00222 return tokList;
00223 }
00224 
00225 
00226 struct kxTok *kxTokenize(char *text, boolean wildAst)
00227 /* Convert text to stream of tokens. If 'wildAst' is
00228  * TRUE then '*' character will be treated as wildcard
00229  * rather than multiplication sign. */
00230 {
00231 return kxTokenizeFancy(text, wildAst, FALSE, FALSE);
00232 }
00233 
00234 void kxTokIncludeQuotes(boolean val)
00235 /* Pass in TRUE if kxTok should include quote characters in string tokens. */
00236 {
00237 includeQuotes = val;
00238 }
00239 
00240 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2