lib/mime.c

Go to the documentation of this file.
00001 /* Routines for processing MIME from a descriptor.
00002  *   For cgi post, the MIME descriptor is stdin.
00003  *   We want to parse it as it comes in so that 
00004  *   we can handle very large files if needed.
00005  *   Large data are saved to tempfiles.
00006  *   Small data stays as ptr+size in memory.
00007  *
00008  * This file is copyright 2005 Jim Kent, but license is hereby
00009  * granted for all use - public, private or commercial. */
00010 
00011 #include "common.h"
00012 #include "hash.h"
00013 #include "linefile.h"
00014 #include "cheapcgi.h"
00015 #include "portable.h"
00016 #include "errabort.h"
00017 #include "mime.h"
00018 
00019 static char const rcsid[] = "$Id: mime.c,v 1.13 2006/06/20 18:31:24 galt Exp $";
00020 /* 
00021  * Note: MIME is a nested structure that makes a tree that streams in depth-first.
00022  */
00023 
00024 #define MAXPARTSIZE 512*1024*1024  /* max size before gets put in a tempfile to save memory */
00025 #define MAXPARTLINESIZE 1024 /* header lines should be small, so bad if bigger than this */
00026 #define MAXDATASIZE 64LL*1024*1024*1024 /* max size allowable for large uploads */
00027 #define MAXBOUNDARY 72+5     /* max size of buffer for boundary 72+--""0 */
00028 
00029 enum nlType nlType = nlt_undet;
00030 
00031 static void setEopMB(struct mimeBuf *b)
00032 /* do a search for boundary, set eop End Of Part if found */
00033 {
00034 if (b->blen > 0)
00035     b->eop = memMatch(b->boundary, b->blen, b->i, b->eoi - b->i);
00036 else
00037     b->eop = NULL;
00038 }
00039 
00040 static void setEodMB(struct mimeBuf *b)
00041 /* set end of data - eoi minus (boundary-size -1) */
00042 {
00043 if (b->blen > 1 && b->eoi == b->eom) 
00044     {
00045     b->eod = b->eoi - (b->blen-1);
00046     }
00047 else
00048     {
00049     b->eod = b->eoi;
00050     }
00051 }
00052 
00053 static void setBoundaryMB(struct mimeBuf *b, char *boundary)
00054 /* set boundary in b */
00055 {
00056 b->boundary = boundary;
00057 b->blen = boundary ? strlen(b->boundary) : 0;
00058 setEopMB(b);
00059 setEodMB(b);
00060 }
00061 
00062 #ifdef DEBUG
00063 static void dumpMB(struct mimeBuf *b)
00064 /* debug dump */
00065 {
00066 int i=0;
00067 
00068 fprintf(stderr,"b->i  =%lu "
00069       "b->eop=%lu "
00070       "b->eod=%lu "
00071       "b->eoi=%lu "
00072       "b->eom=%lu "
00073       "%s "
00074       "%d "
00075       "\n", 
00076     (unsigned long) b->i,
00077     (unsigned long) b->eop,
00078     (unsigned long) b->eod,
00079     (unsigned long) b->eoi,
00080     (unsigned long) b->eom,
00081     b->boundary,
00082     b->blen
00083     );
00084 fprintf(stderr,"*");    
00085 for(i=0;i<MIMEBUFSIZE;++i)
00086     {
00087     fprintf(stderr,"%c", (b->buf[i] < 31 || (unsigned) b->buf[i] > 127) ? '.' : b->buf[i] );
00088     }
00089 fprintf(stderr,"\n\n");    
00090 }
00091 #endif
00092 
00093 static void moreMimeBuf(struct mimeBuf *b)
00094 {
00095 int bytesRead = 0, bytesToRead = 0;
00096 if (b->blen > 1)
00097     {
00098     int r = b->eoi - b->i;
00099     memmove(b->buf, b->i, r);
00100     b->eoi = b->buf+r;
00101     }
00102 else
00103     {
00104     b->eoi = b->buf;
00105     }
00106 b->i = b->buf+0;
00107 bytesToRead = b->eom - b->eoi;
00108 while (bytesToRead > 0)
00109     {
00110     bytesRead = read(b->d, b->eoi, bytesToRead);
00111     if (bytesRead < 0)
00112         errnoAbort("moreMimeBuf: error reading MIME input descriptor");
00113     b->eoi += bytesRead;
00114     if (bytesRead == 0)
00115         break;
00116     bytesToRead = bytesToRead - bytesRead;
00117     }
00118 setEopMB(b);
00119 setEodMB(b);
00120 //debug
00121 //fprintf(stderr,"post-moreMime dumpMB: ");
00122 //dumpMB(b);  //debug
00123 }
00124 
00125 static char getcMB(struct mimeBuf *b)
00126 /* read just one char from MIME buffer */
00127 {
00128 if (b->i >= b->eoi && b->eoi < b->eom)  /* at end of input */
00129     errAbort("getcMB error - requested input beyond end of MIME input.");
00130 if (b->i >= b->eod && b->eoi == b->eom) /* at end of buffer */
00131     moreMimeBuf(b);
00132     
00133 //fprintf(stderr,"b->buf:%lu b->i:%lu %c \n",
00134 //    (unsigned long) b->buf,
00135 //    (unsigned long) b->i,
00136 //    *b->i
00137 //    );
00138 //fprintf(stderr,"%c",*b->i);
00139 //fflush(stderr); 
00140 return *b->i++;    
00141 }
00142 
00143 static void putBackMB(struct mimeBuf *b)
00144 /* Rewind just one char back in MIME buffer.
00145  * Do not use except for distinguishing line type initially */
00146 {
00147 if (b->i == b->buf)  /* at beginning of buffer */
00148     errAbort("putBackMB error - requested pushback beyond beginning buffer.");
00149 b->i--;    
00150 }
00151 
00152 static char *getLineMB(struct mimeBuf *b)
00153 /* Reads one line up to CRLF, returned string does not include CRLF however. 
00154    Use freeMem when done with string. */
00155 {
00156 char line[MAXPARTLINESIZE];
00157 int i = 0;
00158 char c = 0;
00159 line[0]=0;
00160 while(TRUE)
00161     {
00162     c =getcMB(b);
00163     if ((c == 0x0d) || (c == 0x0a))  /* CR or LF is end of line */
00164         break;
00165     line[i++] = c;
00166     if (i >= MAXPARTLINESIZE)
00167         errAbort("getLineMB error - MIME input header too long, "
00168                     "greater than %d chars",MAXPARTLINESIZE);
00169     }
00170 line[i] = 0; /* terminate string */ 
00171 if (nlType == nlt_undet)  /* determine newline type */
00172     {
00173     if (c == 0x0d)
00174         {
00175         nlType = nlt_mac;
00176         c = getcMB(b);
00177         if (c == 0x0a)
00178             nlType = nlt_dos;
00179         else
00180             putBackMB(b);
00181         }
00182     else
00183         {
00184         nlType = nlt_unix;
00185         }
00186     }
00187 else if (nlType == nlt_dos)
00188     {
00189     if (c == 0x0d)
00190         getcMB(b); /* just waste the LF */
00191     else
00192         nlType = nlt_unix;
00193     }
00194 return cloneString(line);
00195 }
00196 
00197 
00198 static void getChunkMB(struct mimeBuf *b, char **address, int *size, boolean *hasZeros)
00199 /* Pass back address and size of chunk, and whether it contains embedded zeros.
00200    The chunk is the largest piece of data left in the buffer up to the eod or eop. */
00201 {
00202 char *eoc = b->eop ? b->eop : b->eod; /* end of chunk */
00203 //debug
00204 //fprintf(stderr,"pre-getChunkMB dumpMB: ");
00205 //dumpMB(b);  //debug
00206 *address=b->i;
00207 *size=eoc - b->i;
00208 *hasZeros = (memMatch("", 1,*address, *size) != NULL);
00209 b->i = eoc;
00210 }
00211 
00212 static void readPartHeaderMB(struct mimeBuf *b, struct mimePart *p, char *altHeader)
00213 /* Reads the header lines of the mimePart,
00214    saves the header settings in a hash.  */
00215 {
00216 struct dyString *fullLine = dyStringNew(0);
00217 char *key=NULL, *val=NULL;
00218 struct lineFile *lf = NULL;
00219 char *line = NULL;
00220 char *lineAhead = NULL;
00221 int size = 0;
00222 p->hdr = newHash(3);
00223         //debug
00224         //fprintf(stderr,"headers dumpMB: ");
00225         //dumpMB(b);  //debug
00226 if (altHeader)
00227     {
00228     lf = lineFileOnString("MIME Header", TRUE, altHeader);
00229     }
00230 /* read ahead one line, skipping any leading blanks lines */   
00231 do
00232     {
00233     if (altHeader)
00234         lineFileNext(lf, &lineAhead, &size);
00235     else
00236         lineAhead = getLineMB(b);
00237     } 
00238     while (sameString(lineAhead,""));
00239 
00240 do
00241     {
00242     /* accumulate a full header line - some emailers split into mpl lines */
00243     dyStringClear(fullLine);
00244     do 
00245         {
00246         line = lineAhead;
00247         if (altHeader)
00248             lineFileNext(lf, &lineAhead, &size);
00249         else
00250             lineAhead = getLineMB(b);
00251         dyStringAppend(fullLine,line);    
00252         if (!altHeader) 
00253             freez(&line);
00254         } while (isspace(lineAhead[0]));
00255     line = fullLine->string;
00256     //fprintf(stderr,"found a line! [%s]\n",line);  //debug
00257     key = line;
00258     val = strchr(line,':');
00259     if (!val)
00260         errAbort("readPartHeaderMB error - header-line colon not found, line=[%s]",line);
00261     *val = 0;
00262     val++;
00263     key=trimSpaces(key);
00264     // since the hash is case-sensitive, convert to lower case for ease of matching
00265     tolowers(key);  
00266     val=trimSpaces(val);
00267     hashAdd(p->hdr,key,cloneString(val));
00268     
00269     //debug
00270     //fprintf(stderr,"MIME header: key=[%s], val=[%s]\n",key,val);
00271     //fflush(stderr); 
00272     
00273     } while (!sameString(lineAhead,""));
00274 if (altHeader)
00275     {
00276     if (nlType == nlt_undet)
00277         nlType = lf->nlType;
00278     lineFileClose(&lf);
00279     }
00280 else
00281     {
00282     freez(&lineAhead);
00283     }
00284 dyStringFree(&fullLine);
00285     
00286 }
00287 
00288 
00289 struct mimeBuf * initMimeBuf(int d)
00290 /* d is a descriptor for a file or socket or some other descriptor 
00291    that the MIME input can be read from. 
00292    Initializes the mimeBuf structure. */
00293 {
00294 struct mimeBuf *b=AllocA(*b);
00295 b->d = d;
00296 b->boundary = NULL;
00297 b->blen = 0;
00298 b->eom = b->buf+MIMEBUFSIZE;
00299 b->eoi = b->eom;
00300 b->eod = b->eom;
00301 b->i = b->eom;
00302 moreMimeBuf(b);
00303 return b;
00304 }
00305 
00306 char *getMimeHeaderMainVal(char *header)
00307 /* Parse a typical mime header line returning the first
00308  * main value up to whitespace, punctuation, or end. 
00309  * freeMem the returned string when done */
00310 {
00311 char value[1024]; 
00312 char *h = header;
00313 int i = 0;
00314 char *puncChars = ",;: \t\r\n"; /* punctuation chars */
00315 i=0;
00316 /* The header should have already been trimmed of leading and trailing spaces */
00317 while(TRUE)
00318     {
00319     char c = *h++;
00320     if (c==0 || strchr(puncChars,c))
00321         break;
00322     value[i++] = c;
00323     if (i >= sizeof(value))
00324         errAbort("error: main value too long (>%lu) in MIME header Content-type:%s",(unsigned long)sizeof(value),header);
00325     }
00326 value[i] = 0;    
00327 
00328 return cloneString(value);
00329 
00330 }
00331 
00332 
00333 char *getMimeHeaderFieldVal(char *header, char *field)
00334 /* Parse a typical mime header line looking for field=
00335  * and return the value which may be quoted.
00336  * freeMem the returned string when done */
00337 {
00338 char value[1024]; 
00339 char *fld = header;
00340 int i = 0;
00341 char *puncChars = ",;: \t\r\n"; /* punctuation chars */
00342 while (TRUE)
00343     {
00344     fld = strstr(fld,field);
00345     if (!fld)
00346         return NULL;
00347     if (fld > header && strchr(puncChars,fld[-1]))
00348         {
00349         fld+=strlen(field);
00350         if (*fld == '=')
00351             {
00352             ++fld;
00353             break;
00354             }
00355         }    
00356     else
00357         {
00358         ++fld;
00359         }
00360     }   
00361 if (*fld == '"')
00362     {
00363     puncChars = "\"";  /* quoted */
00364     ++fld;
00365     }
00366 i=0;
00367 while(TRUE)
00368     {
00369     char c = *fld++;
00370     if (c==0 || strchr(puncChars,c))
00371         break;
00372     value[i++] = c;
00373     if (i >= sizeof(value))
00374         errAbort("error: %s= value too long (>%lu) in MIME header Content-type:%s",field,(unsigned long)sizeof(value),header);
00375     }
00376 value[i] = 0;    
00377 
00378 return cloneString(value);
00379 
00380 }
00381 
00382 char *getNewLineByType()
00383 /* just use global nlType setting */
00384 {
00385 switch (nlType)
00386     {
00387     case nlt_dos:
00388         //debug
00389         //fprintf(stderr,"nlType=nlt_dos\n");
00390         return "\x0d\x0a";
00391     case nlt_mac:
00392         //debug
00393         //fprintf(stderr,"nlType=nlt_mac\n");
00394         return "\x0d";
00395     case nlt_unix:
00396     default:
00397         //debug
00398         //fprintf(stderr,"nlType=nlt_unix\n");
00399         return "\x0a";
00400     }
00401 }
00402 
00403 struct mimePart *parseMultiParts(struct mimeBuf *b, char *altHeader)
00404 /* This is a recursive function.  It parses multipart MIME messages.
00405    Data that are binary or too large will be saved in mimePart->filename
00406    otherwise saved as a c-string in mimePart->data.  If multipart,
00407    then first child is mimePart->child, subsequent sibs are in child->next.
00408    altHeader is a string of headers that can be fed in if the headers have
00409    already been read off the stream by an earlier process, i.e. apache.
00410  */
00411 { 
00412 struct mimePart *p=AllocA(*p);
00413 char *parentboundary = NULL, *boundary = NULL;
00414 char *ct = NULL;
00415 boolean autoBoundary = FALSE;
00416 
00417 
00418 //debug
00419 //fprintf(stderr,"altHeader=[%s]\n",altHeader);
00420 
00421 if (sameOk(altHeader, "autoBoundary"))
00422     { /* process things with no explicit header.
00423        *  look for *MIME* \n\n-- */
00424     struct dyString *dy = dyStringNew(0);
00425     char *prevPrevLine = NULL;
00426     char *prevLine = NULL;
00427     char *line = NULL;
00428     boolean found = FALSE;
00429     autoBoundary = TRUE;
00430     while (TRUE)
00431         {
00432         if (b->i >= b->eoi && b->eoi < b->eom)  /* at end of input */
00433             break;
00434         line = getLineMB(b);
00435         if (line && startsWith("--",line) // && 
00436             //sameString(prevLine,"") && 
00437             //prevPrevLine &&
00438             //stringIn("MULTI",prevPrevLine) && 
00439             //stringIn("MIME",prevPrevLine) 
00440             )
00441             {
00442             found = TRUE;
00443             break;
00444             }
00445         freez(&prevPrevLine);
00446         prevPrevLine = prevLine;
00447         prevLine = line;
00448         if (prevPrevLine)
00449             touppers(prevPrevLine);
00450         }
00451     if (!found)
00452         errAbort("autoBoundary: No initial boundary found.");
00453 
00454     dyStringPrintf(dy, "CONTENT-TYPE:multipart/form-data; boundary=%s%s%s", 
00455         line+2, getNewLineByType(), getNewLineByType() );
00456     altHeader = dyStringCannibalize(&dy); 
00457     
00458     //debug
00459     //fprintf(stderr,"autoBoundary altHeader = [%s]\n",altHeader);
00460     //fflush(stderr); 
00461 
00462     freez(&prevPrevLine);           
00463     freez(&prevLine);       
00464     freez(&line);           
00465     }
00466 
00467 //debug
00468 //fprintf(stderr,"\n");
00469 readPartHeaderMB(b,p,altHeader);
00470 
00471 ct = hashFindVal(p->hdr,"content-type");  /* use lowercase key */
00472 //debug
00473 //fprintf(stderr,"ct from hash:%s\n",ct);
00474 //fflush(stderr); 
00475 
00476 if (ct && startsWith("multipart/",ct))
00477     {
00478     char bound[MAXBOUNDARY]; 
00479     char *bnd = NULL;
00480     struct mimePart *child = NULL;
00481 
00482     /* these 3 vars just for processing epilog chunk: */
00483     char *bp=NULL;
00484     int size=0;
00485     boolean hasZeros=FALSE;
00486 
00487     /* save */
00488     parentboundary = b->boundary;
00489 
00490     boundary = getMimeHeaderFieldVal(ct,"boundary");
00491     if (strlen(boundary) >= MAXBOUNDARY)
00492         errAbort("error: boundary= value too long in MIME header Content-type:%s",ct);
00493     safef(bound, sizeof(bound), "--%s",boundary);  /* do not prepend CRLF to boundary yet */
00494     freez(&boundary);
00495     boundary = cloneString(bound);
00496     //debug
00497     //fprintf(stderr,"initial boundary parsed:%s\n",boundary);
00498     //fflush(stderr); 
00499 
00500     if (!autoBoundary)
00501         {
00502         /* skip any extra "prolog" before the initial boundary marker */
00503         while (TRUE)
00504             {
00505             bnd = getLineMB(b);
00506             if (sameString(bnd,boundary)) 
00507                break;
00508             freez(&bnd);
00509             }
00510             //debug
00511             //fprintf(stderr,"initial boundary found:%s\n",bnd);
00512             //fflush(stderr); 
00513         freez(&bnd);
00514         }
00515 
00516     /* include crlf in the boundary so bodies won't have trailing a CRLF
00517      * this is done here so that in case there's no extra CRLF
00518      * between the header and the boundary, it will still work,
00519      * so we only prepend the CRLF to the boundary after initial found */
00520     safef(bound,sizeof(bound),"%s%s", getNewLineByType(), boundary);
00521     freez(&boundary);
00522     boundary=cloneString(bound);
00523     
00524     setBoundaryMB(b, boundary);
00525 
00526     while(TRUE)
00527         {
00528         int i = 0;
00529         char c1 = ' ', c2 = ' ';
00530         child = parseMultiParts(b,NULL);
00531         slAddHead(&p->multi,child);
00532         //call getLine, compare to boundary 
00533         /* skip extra initial boundary marker - it's moot anyway */
00534         freez(&bnd);
00535             //debug
00536             //fprintf(stderr,"post-parse pre-getLineMB dumpMB: ");
00537             //dumpMB(b);  //debug
00538         for (i=0;i<strlen(boundary);++i)
00539             bound[i] = getcMB(b);
00540         bound[i] = 0;    
00541         if (!sameString(bound,boundary))
00542             errAbort("expected boundary %s, but found %s in MIME",boundary,bound);
00543         //debug
00544         //fprintf(stderr,"\nfound boundary:%s\n",bound);
00545         //fflush(stderr); 
00546         c1 = getcMB(b);
00547         if (c1 == '-')
00548             {
00549             c2 = getcMB(b);
00550             if (c2 == '-')
00551                 break;  /* last boundary found */
00552             else                    
00553                 errAbort("expected -- after boundary %s, but found %c%c in MIME",boundary,c1,c2);
00554             }
00555         if (nlType == nlt_dos)
00556             c2 = getcMB(b);
00557         switch (nlType)
00558             {
00559             case nlt_dos:
00560                 if (c1 == 0x0d && c2 == 0x0a)
00561                     break;
00562                 else                
00563                     errAbort("expected CRLF after boundary %s, but found %c%c in MIME",boundary,c1,c2);
00564             case nlt_unix:
00565                 if (c1 == 0x0a)
00566                     break;
00567                 else                
00568                     errAbort("expected LF after boundary %s, but found %c in MIME",boundary,c1);
00569             case nlt_mac:
00570                 if (c1 == 0x0d)
00571                     break;
00572                 else                
00573                     errAbort("expected CR after boundary %s, but found %c in MIME",boundary,c1);
00574             default:
00575                     errAbort("unexpected nlType %d after boundary %s",nlType,boundary);
00576             }
00577         setEopMB(b);
00578         }       
00579     freez(&bnd);
00580     slReverse(&p->multi);
00581     /* restore */
00582     freez(&boundary);
00583     boundary = parentboundary;
00584         //debug
00585         //fprintf(stderr,"restoring parent boundary = %s\n",boundary);
00586     setBoundaryMB(b, boundary);
00587 
00588     /* dump any "epilog" that may be between the 
00589      * end of the child boundary and the parent boundary */
00590     getChunkMB(b, &bp, &size, &hasZeros);
00591     //debug
00592     //fprintf(stderr,"epilog size=%d\n",size);
00593            
00594     
00595     }
00596 else
00597     {
00598     char *bp=NULL;
00599     int size=0;
00600     boolean hasZeros=FALSE;
00601     boolean toobig=FALSE;
00602     boolean asFile=FALSE;
00603     boolean convert=FALSE;
00604     FILE *f = NULL;
00605     struct dyString *dy=newDyString(1024);
00606     //debug
00607     //fprintf(stderr,"starting new part (non-multi), dumpMB: \n");
00608     //dumpMB(b);  //debug
00609     
00610     //debug
00611     //ct = hashFindVal(p->hdr,"content-transfer-encoding");  /* use lowercase key */
00612     //fprintf(stderr,"cte from hash:%s\n",ct);
00613         
00614     while(TRUE)
00615         {
00616         // break if eop, eod, eoi
00617         getChunkMB(b, &bp, &size, &hasZeros);
00618         //debug
00619         //fprintf(stderr,"bp=%lu size=%d, hasZeros=%d \n", 
00620         //    (unsigned long) bp,
00621         //    size,
00622         //    hasZeros);
00623         if (hasZeros)
00624             {
00625             p->binary=TRUE;
00626             }
00627         //if (hasZeros && !asFile)
00628         //    {
00629         //    convert=TRUE;
00630         //    }
00631         if (!asFile && p->size+size > MAXPARTSIZE)
00632             {
00633             toobig = TRUE;
00634             convert=TRUE;
00635             }
00636         if (convert)
00637             {
00638             struct tempName uploadedData;
00639             convert=FALSE;
00640             asFile = TRUE;
00641             makeTempName(&uploadedData, "hgSs", ".cgi");
00642             p->fileName=cloneString(uploadedData.forCgi);
00643             f = mustOpen(p->fileName,"w");
00644             mustWrite(f,dy->string,dy->stringSize);
00645             freeDyString(&dy);
00646             }
00647         if (asFile)
00648             {
00649             mustWrite(f,bp,size);
00650             }
00651         else
00652             {
00653             dyStringAppendN(dy,bp,size);
00654             }
00655         p->size+=size;
00656         if (p->size > MAXDATASIZE)
00657             errAbort("max data size allowable for upload in MIME exceeded %llu",(unsigned long long)MAXDATASIZE);
00658             
00659         
00660         if (b->eop && b->i == b->eop)  /* end of part */
00661             {
00662             break;
00663             }
00664         if (b->i == b->eoi && b->eoi < b->eom) /* end of data */
00665             {
00666             break;
00667             }
00668         moreMimeBuf(b);
00669         }
00670     if (dy)
00671         {
00672         p->data=needLargeMem(dy->stringSize+1);
00673         memcpy(p->data,dy->string,dy->stringSize);
00674         p->data[dy->stringSize] = 0;
00675         freeDyString(&dy);
00676         }
00677     if (f)
00678         carefulClose(&f);
00679 
00680     //debug
00681     //fprintf(stderr,"p->fileName=%s p->data=[%s]\n",p->fileName,p->data);
00682 
00683     }
00684 
00685 return p;
00686 }
00687 
00688 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2