inc/htmlPage.h

Go to the documentation of this file.
00001 /* htmlPage - stuff to read, parse, and submit  htmlPages and forms. 
00002  *
00003  * typical usage is:
00004  *   struct htmlPage *page = htmlPageGet(url);
00005  *   htmlPageValidateOrAbort(page);
00006  *   var = htmlPageGetVar(page, page->forms, "org");
00007  *   if (var != NULL)
00008  *      printf("Organism = %s\n", var->curVal);
00009  *   htmlPageSetVar(page, page->forms, "org", "Human");
00010  *   newPage = htmlPageFromForm(page, page->forms, "submit", "Go");
00011  */
00012 
00013 #ifndef HTMLPAGE_H
00014 #define HTMLPAGE_H
00015 
00016 #ifndef DYSTRING_H
00017 #include "dystring.h"
00018 #endif
00019 
00020 struct htmlStatus
00021 /* HTTP version and status code. */
00022     {
00023     struct htmlStatus *next;    /* Next in list. */
00024     char *version;              /* Usually something like HTTP/1.1 */
00025     int status;                 /* HTTP status code.  200 is good. */
00026     };
00027 
00028 struct htmlCookie
00029 /* A cookie - stored by browser usually.  We need to echo it
00030  * back when we post forms. */
00031     {
00032     struct htmlCookie *next;    /* Next in list. */
00033     char *name;                 /* Cookie name. */
00034     char *value;                /* Cookie value. */
00035     char *domain;               /* The set of web domains this applies to. */
00036     char *path;                 /* Cookie applies below this path I guess. */
00037     char *expires;              /* Expiration date. */
00038     boolean secure;             /* Is it a secure cookie? */
00039     };
00040 
00041 struct htmlAttribute
00042 /* An html attribute - part of a set of name/values pairs in a tag. */
00043     {
00044     struct htmlAttribute *next;
00045     char *name;         /* Attribute name. */
00046     char *val;          /* Attribute value. */
00047     };
00048 
00049 struct htmlTag
00050 /* A html tag - includes attribute list and parent, but no text. */
00051     {
00052     struct htmlTag *next;
00053     char *name; /* Tag name. */
00054     struct htmlAttribute *attributes;  /* Attribute list. */
00055     char *start;  /* Start of this tag.  Not allocated here.*/
00056     char *end;    /* End of tag (one past closing '>')  Not allocated here.*/
00057     };
00058 
00059 struct htmlFormVar
00060 /* A variable within an html form - from input, button, etc. */
00061     {
00062     struct htmlFormVar *next;   /* Next in list. */
00063     char *name;                 /* Variable name.  Not allocated here.*/
00064     char *tagName;              /* Name of tag.  Not allocated here. */
00065     char *type;                 /* Variable type. Not allocated here. */
00066     char *curVal;               /* Current value if any.  Allocated here. */
00067     struct slName *values;      /* List of available values.  Null if textBox. */
00068     struct slRef *tags;         /* List of references associated tags. */
00069     };
00070 
00071 struct htmlForm
00072 /* A form within an html page. */
00073     {
00074     struct htmlForm *next;      /* Next form in list. */
00075     char *name;                 /* Name (n/a if not defined).  Not allocated here. */
00076     char *action;               /* Action attribute value.  Not allocated here. */
00077     char *method;               /* Defaults to "GET". Not allocated here.  */
00078     struct htmlTag *startTag;   /* Tag that holds <FORM>. Not allocated here.  */
00079     struct htmlTag *endTag;     /* Tag one past </FORM> . Not allocated here. */
00080     struct htmlFormVar *vars; /* List of form variables. */
00081     };
00082 
00083 struct htmlPage
00084 /* A complete html page parsed out. */
00085     {
00086     struct htmlPage *next;
00087     char *url;                          /* Url that produced this page. */
00088     struct htmlStatus *status;          /* Version and status. */
00089     struct hash *header;                /* Hash of header lines (cookies, etc.) */
00090     struct htmlCookie *cookies;         /* Associated cookies if any. */
00091     char *fullText;                     /* Full unparsed text including headers. */
00092     char *htmlText;                     /* Text unparsed after header.  Same mem as fullText. */
00093     struct htmlTag *tags;               /* List of tags in this page. */
00094     struct htmlForm *forms;             /* List of all forms. */
00095     };
00096 
00097 void htmlStatusFree(struct htmlStatus **pStatus);
00098 /* Free up resources associated with status */
00099 
00100 void htmlStatusFreeList(struct htmlStatus **pList);
00101 /* Free a list of dynamically allocated htmlStatus's */
00102 
00103 struct htmlStatus *htmlStatusParse(char **pText);
00104 /* Read in status from first line.  Update pText to point to next line. 
00105  * Note unlike many routines here, this does not insert zeros into text. */
00106 
00107 void htmlCookieFree(struct htmlCookie **pCookie);
00108 /* Free memory associated with cookie. */
00109 
00110 void htmlCookieFreeList(struct htmlCookie **pList);
00111 /* Free a list of dynamically allocated htmlCookie's */
00112 
00113 struct htmlCookie *htmlCookieFileRead(char *fileName);
00114 /* Read cookies from a line oriented file.  First word in line
00115  * is the cookie name, the rest of the line the cookie value. */
00116 
00117 void htmlAttributeFree(struct htmlAttribute **pAttribute);
00118 /* Free up resources associated with attribute. */
00119 
00120 void htmlAttributeFreeList(struct htmlAttribute **pList);
00121 /* Free a list of dynamically allocated htmlAttribute's */
00122 
00123 char *htmlTagAttributeVal(struct htmlPage *page, struct htmlTag *tag, 
00124         char *name, char *defaultVal);
00125 /* Return value of named attribute, or defaultVal if attribute doesn't exist. */
00126 
00127 char *htmlTagAttributeNeeded(struct htmlPage *page, struct htmlTag *tag, char *name);
00128 /* Return named tag attribute.  Complain and return "n/a" if it
00129  * doesn't exist. */
00130 
00131 void htmlTagFree(struct htmlTag **pTag);
00132 /* Free up resources associated with tag. */
00133 
00134 void htmlTagFreeList(struct htmlTag **pList);
00135 /* Free a list of dynamically allocated htmlTag's */
00136 
00137 void htmlFormVarFree(struct htmlFormVar **pVar);
00138 /* Free up resources associated with form variable. */
00139 
00140 void htmlFormVarFreeList(struct htmlFormVar **pList);
00141 /* Free a list of dynamically allocated htmlFormVar's */
00142 
00143 void htmlFormVarPrint(struct htmlFormVar *var, FILE *f, char *prefix);
00144 /* Print out variable to file, prepending prefix. */
00145 
00146 void htmlFormFree(struct htmlForm **pForm);
00147 /* Free up resources associated with form variable. */
00148 
00149 void htmlFormFreeList(struct htmlForm **pList);
00150 /* Free a list of dynamically allocated htmlForm's */
00151 
00152 void htmlFormPrint(struct htmlForm *form, FILE *f);
00153 /* Print out form structure. */
00154 
00155 char *htmlFormCgiVars(struct htmlPage *page, struct htmlForm *form, 
00156         char *buttonName, char *buttonVal, struct dyString *dyHeader);
00157 /* Return cgi vars in name=val format from use having pressed
00158  * submit button of given name and value. */
00159 
00160 struct htmlForm *htmlFormGet(struct htmlPage *page, char *name);
00161 /* Get named form. */
00162 
00163 struct htmlFormVar *htmlFormVarGet(struct htmlForm *form, char *name);
00164 /* Get named variable. */
00165 
00166 void htmlFormVarSet(struct htmlForm *form, char *name, char *val);
00167 /* Set variable to given value.  */
00168 
00169 struct htmlFormVar *htmlPageGetVar(struct htmlPage *page, struct htmlForm *form, char *name);
00170 /* Get named variable.  If form is NULL, first form in page is used. */
00171 
00172 void htmlPageSetVar(struct htmlPage *page, struct htmlForm *form, char *name, char *val);
00173 /* Set variable to given value.  If form is NULL, first form in page is used. */
00174 
00175 void htmlPageFree(struct htmlPage **pPage);
00176 /* Free up resources associated with htmlPage. */
00177 
00178 void htmlPageFreeList(struct htmlPage **pList);
00179 /* Free a list of dynamically allocated htmlPage's */
00180 
00181 char *htmlExpandUrl(char *base, char *url);
00182 /* Expand URL that is relative to base to stand on it's own. 
00183  * Return NULL if it's not http or https. */
00184 
00185 char *htmlNextCrLfLine(char **pS);
00186 /* Return zero-terminated line and advance *pS to start of
00187  * next line.  Return NULL at end of file.  Warn if there is
00188  * no <CR>. */
00189 
00190 struct slName *htmlPageScanAttribute(struct htmlPage *page, 
00191         char *tagName, char *attribute);
00192 /* Scan page for values of particular attribute in particular tag.
00193  * if tag is NULL then scans in all tags. */
00194 
00195 struct slName *htmlPageLinks(struct htmlPage *page);
00196 /* Scan through tags list and pull out HREF attributes. */
00197 
00198 void htmlPageFormOrAbort(struct htmlPage *page);
00199 /* Aborts if no FORM found */
00200 
00201 void htmlPageValidateOrAbort(struct htmlPage *page);
00202 /* Do some basic validations.  Aborts if there is a problem. */
00203 
00204 char *htmlSlurpWithCookies(char *url, struct htmlCookie *cookies);
00205 /* Send get message to url with cookies, and return full response as
00206  * a dyString.  This is not parsed or validated, and includes http
00207  * header lines.  Typically you'd pass this to htmlPageParse() to
00208  * get an actual page. */
00209 
00210 struct htmlPage *htmlPageParse(char *url, char *fullText);
00211 /* Parse out page and return.  Warn and return NULL if problem. */
00212 
00213 struct htmlPage *htmlPageParseOk(char *url, char *fullText);
00214 /* Parse out page and return only if status ok. */
00215 
00216 struct htmlPage *htmlPageParseNoHead(char *url, char *htmlText);
00217 /* Parse out page in memory (past http header if any) and return. */
00218 
00219 struct htmlPage *htmlPageFromForm(struct htmlPage *origPage, struct htmlForm *form, 
00220         char *buttonName, char *buttonVal);
00221 /* Return a new htmlPage based on response to pressing indicated button
00222  * on indicated form in origPage. */
00223 
00224 struct htmlPage *htmlPageGetWithCookies(char *url, struct htmlCookie *cookies);
00225 /* Get page from URL giving server the given cookies.   Note only the
00226  * name and value parts of the cookies need to be filled in. */
00227 
00228 struct htmlPage *htmlPageGet(char *url);
00229 /* Get page from URL (may be a file). */
00230 
00231 struct htmlPage *htmlPageForwarded(char *url, struct htmlCookie *cookies);
00232 /* Get html page.  If it's just a forwarding link then get do the
00233  * forwarding.  Cookies is a possibly empty list of cookies with
00234  * name and value parts filled in. */
00235 
00236 struct htmlPage *htmlPageForwardedNoAbort(char *url, struct htmlCookie *cookies);
00237 /* Try and get an HTML page.  Print warning and return NULL if there's a problem. */
00238 #endif /* HTMLPAGE_H */
00239 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2