lib/linefile.c

Go to the documentation of this file.
00001 /* lineFile - stuff to rapidly read text files and parse them into
00002  * lines. 
00003  *
00004  * This file is copyright 2002 Jim Kent, but license is hereby
00005  * granted for all use - public, private or commercial. */
00006 
00007 #include "common.h"
00008 #include "hash.h"
00009 #include <fcntl.h>
00010 #include "dystring.h"
00011 #include "errabort.h"
00012 #include "linefile.h"
00013 #include "pipeline.h"
00014 #include <signal.h>
00015 
00016 static char const rcsid[] = "$Id: linefile.c,v 1.54 2007/03/23 08:12:42 kent Exp $";
00017 
00018 char *getFileNameFromHdrSig(char *m)
00019 /* Check if header has signature of supported compression stream,
00020    and return a phoney filename for it, or NULL if no sig found. */
00021 {
00022 char buf[20];
00023 char *ext=NULL;
00024 if (startsWith("\x1f\x8b",m)) ext = "gz";
00025 else if (startsWith("\x1f\x9d\x90",m)) ext = "Z";
00026 else if (startsWith("BZ",m)) ext = "bz2";
00027 if (ext==NULL) 
00028     return NULL;
00029 safef(buf, sizeof(buf), "somefile.%s", ext);
00030 return cloneString(buf);
00031 }   
00032 
00033 static char **getDecompressor(char *fileName)
00034 /* if a file is compressed, return the command to decompress the 
00035  * approriate format, otherwise return NULL */
00036 {
00037 static char *GZ_READ[] = {"gzip", "-dc", NULL};
00038 static char *Z_READ[] = {"compress", "-dc", NULL};
00039 static char *BZ2_READ[] = {"bzip2", "-dc", NULL};
00040 
00041 if (endsWith(fileName, ".gz"))
00042     return GZ_READ;
00043 else if (endsWith(fileName, ".Z"))
00044     return Z_READ;
00045 else if (endsWith(fileName, ".bz2"))
00046     return BZ2_READ;
00047 else
00048     return NULL;
00049 }
00050 
00051 static void metaDataAdd(struct lineFile *lf, char *line)
00052 /* write a line of metaData to output file 
00053  * internal function called by lineFileNext */
00054 {
00055 struct metaOutput *meta = NULL;
00056 
00057 if (lf->isMetaUnique)
00058     {
00059     /* suppress repetition of comments */
00060     if (hashLookup(lf->metaLines, line))
00061         {
00062         return;
00063         }
00064     hashAdd(lf->metaLines, line, NULL);
00065     }
00066 for (meta = lf->metaOutput ; meta != NULL ; meta = meta->next)
00067     if (line != NULL && meta->metaFile != NULL)
00068         fprintf(meta->metaFile,"%s\n", line);
00069 }
00070 
00071 static void metaDataFree(struct lineFile *lf)
00072 /* free saved comments */
00073 {
00074 if (lf->isMetaUnique && lf->metaLines)
00075     freeHash(&lf->metaLines);
00076 }
00077 
00078 void lineFileSetMetaDataOutput(struct lineFile *lf, FILE *f)
00079 /* set file to write meta data to,
00080  * should be called before reading from input file */
00081 {
00082 struct metaOutput *meta = NULL;
00083 if (lf == NULL)
00084     return;
00085 AllocVar(meta);
00086 meta->next = NULL;
00087 meta->metaFile = f;
00088 slAddHead(&lf->metaOutput, meta);
00089 }
00090 
00091 void lineFileSetUniqueMetaData(struct lineFile *lf)
00092 /* suppress duplicate lines in metadata */
00093 {
00094 lf->isMetaUnique = TRUE;
00095 lf->metaLines = hashNew(8);
00096 }
00097 
00098 static char * headerBytes(char *fileName, int numbytes)
00099 /* Return specified number of header bytes from file 
00100  * if file exists as a string which should be freed. */
00101 {
00102 int fd,bytesread=0;
00103 char *result = NULL;
00104 if ((fd = open(fileName, O_RDONLY)) >= 0)
00105     {
00106     result=needMem(numbytes+1);
00107     if ((bytesread=read(fd,result,numbytes)) < numbytes) 
00108         freez(&result);  /* file too short? can read numbytes */
00109     else
00110         result[numbytes]=0;
00111     close(fd);
00112     }
00113 return result;
00114 }
00115      
00116 
00117 struct lineFile *lineFileDecompress(char *fileName, bool zTerm)
00118 /* open a linefile with decompression */
00119 {
00120 struct pipeline *pl;
00121 struct lineFile *lf;
00122 char *testName = NULL;
00123 char *testbytes = NULL;    /* the header signatures for .gz .bz2, .Z are all 2 or 3 bytes only */
00124 if (fileName==NULL)
00125   return NULL;
00126 testbytes=headerBytes(fileName,3);
00127 if (!testbytes)
00128     return NULL;  /* avoid error from pipeline */
00129 testName=getFileNameFromHdrSig(testbytes);
00130 freez(&testbytes);
00131 if (!testName)
00132     return NULL;  /* avoid error from pipeline */
00133 pl = pipelineOpen1(getDecompressor(fileName), pipelineRead, fileName, NULL);
00134 lf = lineFileAttach(fileName, zTerm, pipelineFd(pl));
00135 lf->pl = pl;
00136 return lf;
00137 }
00138 
00139 struct lineFile *lineFileDecompressFd(char *name, bool zTerm, int fd)
00140 /* open a linefile with decompression from a file or socket descriptor */
00141 {
00142 struct pipeline *pl;
00143 struct lineFile *lf;
00144 pl = pipelineOpenFd1(getDecompressor(name), pipelineRead, fd, STDERR_FILENO);
00145 lf = lineFileAttach(name, zTerm, pipelineFd(pl));
00146 lf->pl = pl;
00147 return lf;
00148 }
00149 
00150 
00151 
00152 struct lineFile *lineFileDecompressMem(bool zTerm, char *mem, long size)
00153 /* open a linefile with decompression from a memory stream */
00154 {
00155 struct pipeline *pl;
00156 struct lineFile *lf;
00157 char *fileName = getFileNameFromHdrSig(mem);
00158 if (fileName==NULL)
00159   return NULL;
00160 pl = pipelineOpenMem1(getDecompressor(fileName), pipelineRead, mem, size, STDERR_FILENO);
00161 lf = lineFileAttach(fileName, zTerm, pipelineFd(pl));
00162 lf->pl = pl;
00163 return lf;
00164 }
00165 
00166 
00167 
00168 struct lineFile *lineFileAttach(char *fileName, bool zTerm, int fd)
00169 /* Wrap a line file around an open'd file. */
00170 {
00171 struct lineFile *lf;
00172 AllocVar(lf);
00173 lf->fileName = cloneString(fileName);
00174 lf->fd = fd;
00175 lf->bufSize = 64*1024;
00176 lf->zTerm = zTerm;
00177 lf->buf = needMem(lf->bufSize+1);
00178 return lf;
00179 }
00180 
00181 struct lineFile *lineFileOnString(char *name, bool zTerm, char *s)
00182 /* Wrap a line file object around string in memory. This buffer
00183  * have zeroes written into it and be freed when the line file
00184  * is closed. */
00185 {
00186 struct lineFile *lf;
00187 AllocVar(lf);
00188 lf->fileName = cloneString(name);
00189 lf->fd = -1;
00190 lf->bufSize = lf->bytesInBuf = strlen(s);
00191 lf->zTerm = zTerm;
00192 lf->buf = s;
00193 return lf;
00194 }
00195 
00196 void lineFileExpandBuf(struct lineFile *lf, int newSize)
00197 /* Expand line file buffer. */
00198 {
00199 assert(newSize > lf->bufSize);
00200 lf->buf = needMoreMem(lf->buf, lf->bytesInBuf, newSize);
00201 lf->bufSize = newSize;
00202 }
00203 
00204 
00205 struct lineFile *lineFileStdin(bool zTerm)
00206 /* Wrap a line file around stdin. */
00207 {
00208 return lineFileAttach("stdin", zTerm, fileno(stdin));
00209 }
00210 
00211 struct lineFile *lineFileMayOpen(char *fileName, bool zTerm)
00212 /* Try and open up a lineFile. */
00213 {
00214 if (sameString(fileName, "stdin"))
00215     return lineFileStdin(zTerm);
00216 else if (getDecompressor(fileName) != NULL)
00217     return lineFileDecompress(fileName, zTerm);
00218 else
00219     {
00220     int fd = open(fileName, O_RDONLY);
00221     if (fd == -1)
00222         return NULL;
00223     return lineFileAttach(fileName, zTerm, fd);
00224     }
00225 }
00226 
00227 struct lineFile *lineFileOpen(char *fileName, bool zTerm)
00228 /* Open up a lineFile or die trying. */
00229 {
00230 struct lineFile *lf = lineFileMayOpen(fileName, zTerm);
00231 if (lf == NULL)
00232     errAbort("Couldn't open %s , %s", fileName, strerror(errno));
00233 return lf;
00234 }
00235 
00236 void lineFileReuse(struct lineFile *lf)
00237 /* Reuse current line. */
00238 {
00239 lf->reuse = TRUE;
00240 }
00241 
00242 
00243 void lineFileSeek(struct lineFile *lf, off_t offset, int whence)
00244 /* Seek to read next line from given position. */
00245 {
00246 if (lf->pl != NULL)
00247     errnoAbort("Can't lineFileSeek on a compressed file: %s", lf->fileName);
00248 lf->reuse = FALSE;
00249 if (whence == SEEK_SET && offset >= lf->bufOffsetInFile 
00250         && offset < lf->bufOffsetInFile + lf->bytesInBuf)
00251     {
00252     lf->lineStart = lf->lineEnd = offset - lf->bufOffsetInFile;
00253     }
00254 else
00255     {
00256     lf->lineStart = lf->lineEnd = lf->bytesInBuf = 0;
00257     if ((lf->bufOffsetInFile = lseek(lf->fd, offset, whence)) == -1)
00258         errnoAbort("Couldn't lineFileSeek %s", lf->fileName);
00259     }
00260 }
00261 
00262 int lineFileLongNetRead(int fd, char *buf, int size)
00263 /* Keep reading until either get no new characters or
00264  * have read size */
00265 {
00266 int oneSize, totalRead = 0;
00267 
00268 while (size > 0)
00269     {
00270     oneSize = read(fd, buf, size);
00271     if (oneSize <= 0)
00272         break;
00273     totalRead += oneSize;
00274     buf += oneSize;
00275     size -= oneSize;
00276     }
00277 return totalRead;
00278 }
00279 
00280 static void determineNlType(struct lineFile *lf, char *buf, int bufSize)
00281 /* determine type of newline used for the file, assumes buffer not empty */
00282 {
00283 char *c = buf;
00284 if (bufSize==0) return;
00285 if (lf->nlType != nlt_undet) return;  /* if already determined just exit */
00286 lf->nlType = nlt_unix;  /* start with default of unix lf type */
00287 while (c < buf+bufSize)
00288     {
00289     if (*c=='\r')
00290         {
00291         lf->nlType = nlt_mac;
00292         if (++c < buf+bufSize) 
00293             if (*c == '\n') 
00294                 lf->nlType = nlt_dos;
00295         return;
00296         }
00297     if (*(c++) == '\n')
00298         {
00299         return;
00300         }
00301     }
00302 }
00303 
00304 boolean lineFileNext(struct lineFile *lf, char **retStart, int *retSize)
00305 /* Fetch next line from file. */
00306 {
00307 char *buf = lf->buf;
00308 int bytesInBuf = lf->bytesInBuf;
00309 int endIx = lf->lineEnd;
00310 boolean gotLf = FALSE;
00311 int newStart;
00312 
00313 if (lf->reuse)
00314     {
00315     lf->reuse = FALSE;
00316     if (retSize != NULL)
00317         *retSize = lf->lineEnd - lf->lineStart;
00318     *retStart = buf + lf->lineStart;
00319     if (lf->metaOutput && *retStart[0] == '#') 
00320         metaDataAdd(lf, *retStart); 
00321     return TRUE;
00322     }
00323 
00324 determineNlType(lf, buf+endIx, bytesInBuf);
00325 
00326 /* Find next end of line in buffer. */
00327 switch(lf->nlType)
00328     {
00329     case nlt_unix:
00330     case nlt_dos:
00331         for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx)
00332             {
00333             if (buf[endIx] == '\n')
00334                 {
00335                 gotLf = TRUE;
00336                 endIx += 1;
00337                 break;
00338                 }
00339             }
00340         break;
00341     case nlt_mac:
00342         for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx)
00343             {
00344             if (buf[endIx] == '\r')
00345                 {
00346                 gotLf = TRUE;
00347                 endIx += 1;
00348                 break;
00349                 }
00350             }
00351         break;
00352     case nlt_undet:
00353         break;
00354     }
00355 
00356 /* If not in buffer read in a new buffer's worth. */
00357 while (!gotLf)
00358     {
00359     int oldEnd = lf->lineEnd;
00360     int sizeLeft = bytesInBuf - oldEnd;
00361     int bufSize = lf->bufSize;
00362     int readSize = bufSize - sizeLeft;
00363 
00364     if (oldEnd > 0 && sizeLeft > 0)
00365         {
00366         memmove(buf, buf+oldEnd, sizeLeft);
00367         }
00368     lf->bufOffsetInFile += oldEnd;
00369     if (lf->fd >= 0)
00370         readSize = lineFileLongNetRead(lf->fd, buf+sizeLeft, readSize);
00371     else
00372         readSize = 0;
00373 
00374     if ((readSize == 0) && (endIx > oldEnd))
00375         {
00376         endIx = sizeLeft;
00377         buf[endIx] = 0;
00378         lf->bytesInBuf = newStart = lf->lineStart = 0;
00379         lf->lineEnd = endIx;
00380         ++lf->lineIx;
00381         if (retSize != NULL)
00382             *retSize = endIx - newStart;
00383         *retStart = buf + newStart;
00384         if (*retStart[0] == '#')
00385             metaDataAdd(lf, *retStart);
00386         return TRUE;
00387         }
00388     else if (readSize <= 0)
00389         {
00390         lf->bytesInBuf = lf->lineStart = lf->lineEnd = 0;
00391         return FALSE;
00392         }
00393     bytesInBuf = lf->bytesInBuf = readSize + sizeLeft;
00394     lf->lineEnd = 0;
00395 
00396     determineNlType(lf, buf+endIx, bytesInBuf);
00397         
00398     /* Look for next end of line.  */
00399     switch(lf->nlType)
00400         {
00401         case nlt_unix:
00402         case nlt_dos:
00403             for (endIx = sizeLeft; endIx <bytesInBuf; ++endIx)
00404                 {
00405                 if (buf[endIx] == '\n')
00406                     {
00407                     endIx += 1;
00408                     gotLf = TRUE;
00409                     break;
00410                     }
00411                 }
00412             break;
00413         case nlt_mac:
00414             for (endIx = sizeLeft; endIx <bytesInBuf; ++endIx)
00415                 {
00416                 if (buf[endIx] == '\r')
00417                     {
00418                     endIx += 1;
00419                     gotLf = TRUE;
00420                     break;
00421                     }
00422                 }
00423             break;
00424         case nlt_undet:
00425             break;
00426         }
00427     if (!gotLf && bytesInBuf == lf->bufSize)
00428         {
00429         if (bufSize >= 512*1024*1024)
00430             {
00431             errAbort("Line too long (more than %d chars) line %d of %s",
00432                 lf->bufSize, lf->lineIx+1, lf->fileName);
00433             }
00434         else
00435             {
00436             lineFileExpandBuf(lf, bufSize*2);
00437             buf = lf->buf;
00438             }
00439         }
00440     }
00441 
00442 if (lf->zTerm)
00443     {
00444     buf[endIx-1] = 0;
00445     if ((lf->nlType == nlt_dos) && (buf[endIx-2]=='\r'))
00446         {
00447         buf[endIx-2] = 0;
00448         }
00449     }
00450 
00451 lf->lineStart = newStart = lf->lineEnd;
00452 lf->lineEnd = endIx;
00453 ++lf->lineIx;
00454 if (retSize != NULL)
00455     *retSize = endIx - newStart;
00456 *retStart = buf + newStart;
00457 if (*retStart[0] == '#')
00458     metaDataAdd(lf, *retStart);
00459 return TRUE;
00460 }
00461 
00462 void lineFileVaAbort(struct lineFile *lf, char *format, va_list args)
00463 /* Print file name, line number, and error message, and abort. */
00464 {
00465 struct dyString *dy = dyStringNew(0);
00466 dyStringPrintf(dy,  "Error line %d of %s: ", lf->lineIx, lf->fileName);
00467 dyStringVaPrintf(dy, format, args);
00468 errAbort("%s", dy->string);
00469 dyStringFree(&dy);
00470 }
00471 
00472 void lineFileAbort(struct lineFile *lf, char *format, ...)
00473 /* Print file name, line number, and error message, and abort. */
00474 {
00475 va_list args;
00476 va_start(args, format);
00477 lineFileVaAbort(lf, format, args);
00478 va_end(args);
00479 }
00480 
00481 void lineFileUnexpectedEnd(struct lineFile *lf)
00482 /* Complain about unexpected end of file. */
00483 {
00484 errAbort("Unexpected end of file in %s", lf->fileName);
00485 }
00486 
00487 void lineFileNeedNext(struct lineFile *lf, char **retStart, int *retSize)
00488 /* Fetch next line from file.  Squawk and die if it's not there. */
00489 {
00490 if (!lineFileNext(lf, retStart, retSize))
00491     lineFileUnexpectedEnd(lf);
00492 }
00493 
00494 void lineFileClose(struct lineFile **pLf)
00495 /* Close up a line file. */
00496 {
00497 struct lineFile *lf;
00498 if ((lf = *pLf) != NULL)
00499     {
00500     if (lf->pl != NULL)
00501         {
00502         pipelineWait(lf->pl);
00503         pipelineFree(&lf->pl);
00504         }
00505     else if (lf->fd > 0 && lf->fd != fileno(stdin))
00506         {
00507         close(lf->fd);
00508         freeMem(lf->buf);
00509         }
00510     freeMem(lf->fileName);
00511     metaDataFree(lf);
00512     freez(pLf);
00513     }
00514 }
00515 
00516 void lineFileCloseList(struct lineFile **pList)
00517 /* Close up a list of line files. */
00518 {
00519 struct lineFile *el, *next;
00520 
00521 for (el = *pList; el != NULL; el = next)
00522     {
00523     next = el->next;
00524     lineFileClose(&el);
00525     }
00526 *pList = NULL;
00527 }
00528 
00529 void lineFileExpectWords(struct lineFile *lf, int expecting, int got)
00530 /* Check line has right number of words. */
00531 {
00532 if (expecting != got)
00533     errAbort("Expecting %d words line %d of %s got %d", 
00534             expecting, lf->lineIx, lf->fileName, got);
00535 }
00536 
00537 void lineFileExpectAtLeast(struct lineFile *lf, int expecting, int got)
00538 /* Check line has right number of words. */
00539 {
00540 if (got < expecting)
00541     errAbort("Expecting at least %d words line %d of %s got %d", 
00542             expecting, lf->lineIx, lf->fileName, got);
00543 }
00544 
00545 void lineFileShort(struct lineFile *lf)
00546 /* Complain that line is too short. */
00547 {
00548 errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
00549 }
00550 
00551 boolean lineFileNextReal(struct lineFile *lf, char **retStart)
00552 /* Fetch next line from file that is not blank and 
00553  * does not start with a '#'. */
00554 {
00555 char *s, c;
00556 while (lineFileNext(lf, retStart, NULL))
00557     {
00558     s = skipLeadingSpaces(*retStart);
00559     c = s[0];
00560     if (c != 0 && c != '#')
00561         {
00562         return TRUE;
00563         }
00564     }
00565 return FALSE;
00566 }
00567 
00568 int lineFileChopNext(struct lineFile *lf, char *words[], int maxWords)
00569 /* Return next non-blank line that doesn't start with '#' chopped into words. */
00570 {
00571 int lineSize, wordCount;
00572 char *line;
00573 
00574 while (lineFileNext(lf, &line, &lineSize))
00575     {
00576     if (line[0] == '#')
00577         continue;
00578     wordCount = chopByWhite(line, words, maxWords);
00579     if (wordCount != 0)
00580         return wordCount;
00581     }
00582 return 0;
00583 }
00584 
00585 int lineFileChopCharNext(struct lineFile *lf, char sep, char *words[], int maxWords)
00586 /* Return next non-blank line that doesn't start with '#' chopped into
00587    words delimited by sep. */
00588 {
00589 int lineSize, wordCount;
00590 char *line;
00591 
00592 while (lineFileNext(lf, &line, &lineSize))
00593     {
00594     if (line[0] == '#')
00595         continue;
00596     wordCount = chopByChar(line, sep, words, maxWords);
00597     if (wordCount != 0)
00598         return wordCount;
00599     }
00600 return 0;
00601 }
00602 
00603 int lineFileChopNextTab(struct lineFile *lf, char *words[], int maxWords)
00604 /* Return next non-blank line that doesn't start with '#' chopped into words
00605  * on tabs */
00606 {
00607 int lineSize, wordCount;
00608 char *line;
00609 
00610 while (lineFileNext(lf, &line, &lineSize))
00611     {
00612     if (line[0] == '#')
00613         continue;
00614     wordCount = chopByChar(line, '\t', words, maxWords);
00615     if (wordCount != 0)
00616         return wordCount;
00617     }
00618 return 0;
00619 }
00620 
00621 boolean lineFileNextCharRow(struct lineFile *lf, char sep, char *words[], int wordCount)
00622 /* Return next non-blank line that doesn't start with '#' chopped into words
00623  * delimited by sep. Returns FALSE at EOF.  Aborts on error. */
00624 {
00625 int wordsRead;
00626 wordsRead = lineFileChopCharNext(lf, sep, words, wordCount);
00627 if (wordsRead == 0)
00628     return FALSE;
00629 if (wordsRead < wordCount)
00630     lineFileExpectWords(lf, wordCount, wordsRead);
00631 return TRUE;
00632 }
00633 
00634 boolean lineFileNextRow(struct lineFile *lf, char *words[], int wordCount)
00635 /* Return next non-blank line that doesn't start with '#' chopped into words.
00636  * Returns FALSE at EOF.  Aborts on error. */
00637 {
00638 int wordsRead;
00639 wordsRead = lineFileChopNext(lf, words, wordCount);
00640 if (wordsRead == 0)
00641     return FALSE;
00642 if (wordsRead < wordCount)
00643     lineFileExpectWords(lf, wordCount, wordsRead);
00644 return TRUE;
00645 }
00646 
00647 boolean lineFileNextRowTab(struct lineFile *lf, char *words[], int wordCount)
00648 /* Return next non-blank line that doesn't start with '#' chopped into words
00649  * at tabs. Returns FALSE at EOF.  Aborts on error. */
00650 {
00651 int wordsRead;
00652 wordsRead = lineFileChopNextTab(lf, words, wordCount);
00653 if (wordsRead == 0)
00654     return FALSE;
00655 if (wordsRead < wordCount)
00656     lineFileExpectWords(lf, wordCount, wordsRead);
00657 return TRUE;
00658 }
00659 
00660 int lineFileNeedFullNum(struct lineFile *lf, char *words[], int wordIx)
00661 /* Make sure that words[wordIx] is an ascii integer, and return
00662  * binary representation of it. Require all chars in word to be digits.*/
00663 {
00664 char *c;
00665 for (c = words[wordIx]; *c; c++)
00666     {
00667     if (*c == '-' || isdigit(*c))
00668         /* NOTE: embedded '-' will be caught by lineFileNeedNum */
00669         continue;
00670     errAbort("Expecting number field %d line %d of %s, got %s", 
00671             wordIx+1, lf->lineIx, lf->fileName, words[wordIx]);
00672     }
00673 return lineFileNeedNum(lf, words, wordIx);
00674 }
00675 
00676 int lineFileNeedNum(struct lineFile *lf, char *words[], int wordIx)
00677 /* Make sure that words[wordIx] is an ascii integer, and return
00678  * binary representation of it. Conversion stops at first non-digit char. */
00679 {
00680 char *ascii = words[wordIx];
00681 char c = ascii[0];
00682 if (c != '-' && !isdigit(c))
00683     errAbort("Expecting number field %d line %d of %s, got %s", 
00684         wordIx+1, lf->lineIx, lf->fileName, ascii);
00685 return atoi(ascii);
00686 }
00687 
00688 double lineFileNeedDouble(struct lineFile *lf, char *words[], int wordIx)
00689 /* Make sure that words[wordIx] is an ascii double value, and return
00690  * binary representation of it. */
00691 {
00692 char *valEnd;
00693 char *val = words[wordIx];
00694 double doubleValue;
00695 
00696 doubleValue = strtod(val, &valEnd);
00697 if ((*val == '\0') || (*valEnd != '\0'))
00698     errAbort("Expecting double field %d line %d of %s, got %s",
00699         wordIx+1, lf->lineIx, lf->fileName, val);
00700 return doubleValue;
00701 }
00702 
00703 void lineFileSkip(struct lineFile *lf, int lineCount)
00704 /* Skip a number of lines. */
00705 {
00706 int i, lineSize;
00707 char *line;
00708 
00709 for (i=0; i<lineCount; ++i)
00710     {
00711     if (!lineFileNext(lf, &line, &lineSize))
00712         errAbort("Premature end of file in %s", lf->fileName);
00713     }
00714 }
00715 
00716 char *lineFileSkipToLineStartingWith(struct lineFile *lf, char *start, int maxCount)
00717 /* Skip to next line that starts with given string.  Return NULL
00718  * if no such line found, otherwise return the line. */
00719 {
00720 char *line;
00721 while (lineFileNext(lf, &line, NULL) && --maxCount >= 0)
00722     {
00723     if (startsWith(start, line))
00724         return line;
00725     }
00726 return NULL;
00727 }
00728 
00729 boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr,
00730                                 boolean *chunked, int *contentLength)
00731 /* Extract HTTP response header from lf into hdr, tell if it's 
00732  * "Transfer-Encoding: chunked" or if it has a contentLength. */
00733 {
00734   struct dyString *header = newDyString(1024);
00735   char *line;
00736   int lineSize;
00737 
00738   if (chunked != NULL)
00739     *chunked = FALSE;
00740   if (contentLength != NULL)
00741     *contentLength = -1;
00742   dyStringClear(header);
00743   if (lineFileNext(lf, &line, &lineSize))
00744     {
00745       if (startsWith("HTTP/", line))
00746         {
00747         char *version, *code;
00748         dyStringAppendN(header, line, lineSize-1);
00749         dyStringAppendC(header, '\n');
00750         version = nextWord(&line);
00751         code = nextWord(&line);
00752         if (code == NULL)
00753             {
00754             warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
00755             *hdr = cloneString(header->string);
00756             dyStringFree(&header);
00757             return FALSE;
00758             }
00759         if (!sameString(code, "200"))
00760             {
00761             warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line);
00762             *hdr = cloneString(header->string);
00763             dyStringFree(&header);
00764             return FALSE;
00765             }
00766         while (lineFileNext(lf, &line, &lineSize))
00767             {
00768             /* blank line means end of HTTP header */
00769             if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
00770                 break;
00771             if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL)
00772                 *chunked = TRUE;
00773             dyStringAppendN(header, line, lineSize-1);
00774             dyStringAppendC(header, '\n');
00775             if (strstr(line, "Content-Length:"))
00776               {
00777                 code = nextWord(&line);
00778                 code = nextWord(&line);
00779                 if (contentLength != NULL)
00780                     *contentLength = atoi(code);
00781               }
00782             }
00783         }
00784       else
00785         {
00786           /* put the line back, don't put it in header/hdr */
00787           lineFileReuse(lf);
00788           warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
00789           *hdr = cloneString(header->string);
00790           dyStringFree(&header);
00791           return FALSE;
00792         }
00793     }
00794   else
00795     {
00796       *hdr = cloneString(header->string);
00797       dyStringFree(&header);
00798       return FALSE;
00799     }
00800 
00801   *hdr = cloneString(header->string);
00802   dyStringFree(&header);
00803   return TRUE;
00804 } /* lineFileParseHttpHeader */
00805 
00806 struct dyString *lineFileSlurpHttpBody(struct lineFile *lf,
00807                                        boolean chunked, int contentLength)
00808 /* Return a dyString that contains the http response body in lf.  Handle 
00809  * chunk-encoding and content-length. */
00810 {
00811   struct dyString *body = newDyString(64*1024);
00812   char *line;
00813   int lineSize;
00814 
00815   dyStringClear(body);
00816   if (chunked)
00817     {
00818       /* Handle "Transfer-Encoding: chunked" body */
00819       /* Procedure from RFC2068 section 19.4.6 */
00820       char *csword;
00821       unsigned chunkSize = 0;
00822       unsigned size;
00823       do
00824         {
00825           /* Read line that has chunk size (in hex) as first word. */
00826           if (lineFileNext(lf, &line, NULL))
00827             csword = nextWord(&line);
00828           else break;
00829           if (sscanf(csword, "%x", &chunkSize) < 1)
00830             {
00831               warn("%s: chunked transfer-encoding chunk size parse error.\n",
00832                    lf->fileName);
00833               break;
00834             }
00835           /* If chunk size is 0, read in a blank line & then we're done. */
00836           if (chunkSize == 0)
00837             {
00838               lineFileNext(lf, &line, NULL);
00839               if (line == NULL || (line[0] != '\r' && line[0] != 0))
00840                 warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
00841                      lf->fileName, line);
00842               
00843               break;
00844             }
00845           /* Read (and save) lines until we have read in chunk. */
00846           for (size = 0;  size < chunkSize;  size += lineSize)
00847             {
00848               if (! lineFileNext(lf, &line, &lineSize))
00849                 break;
00850               dyStringAppendN(body, line, lineSize-1);
00851               dyStringAppendC(body, '\n');
00852             }
00853           /* Read blank line - or extra CRLF inserted in the middle of the 
00854            * current line, in which case we need to trim it. */
00855           if (size > chunkSize)
00856             {
00857               body->stringSize -= (size - chunkSize);
00858               body->string[body->stringSize] = 0;
00859             }
00860           else if (size == chunkSize)
00861             {
00862               lineFileNext(lf, &line, NULL);
00863               if (line == NULL || (line[0] != '\r' && line[0] != 0))
00864                 warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
00865                      lf->fileName, line);
00866             }
00867         } while (chunkSize > 0);
00868       /* Try to read in next line.  If it's an HTTP header, put it back. */
00869       /* If there is a next line but it's not an HTTP header, it's a footer. */
00870       if (lineFileNext(lf, &line, NULL))
00871         {
00872           if (startsWith("HTTP/", line))
00873             lineFileReuse(lf);
00874           else
00875             {
00876               /* Got a footer -- keep reading until blank line */
00877               warn("%s: chunked transfer-encoding: got footer %s, discarding it.\n",
00878                    lf->fileName, line);
00879               while (lineFileNext(lf, &line, NULL))
00880                 {
00881                   if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
00882                     break;
00883                   warn("discarding footer line: %s\n", line);
00884                 }
00885             }
00886         }
00887     }
00888   else if (contentLength >= 0)
00889     {
00890       /* Read in known length */
00891       int size;
00892       for (size = 0;  size < contentLength;  size += lineSize)
00893         {
00894           if (! lineFileNext(lf, &line, &lineSize))
00895             break;
00896           dyStringAppendN(body, line, lineSize-1);
00897           dyStringAppendC(body, '\n');
00898         }
00899     }
00900   else
00901     {
00902       /* Read in to end of file (assume it's not a persistent connection) */
00903       while (lineFileNext(lf, &line, &lineSize))
00904         {
00905           dyStringAppendN(body, line, lineSize-1);
00906           dyStringAppendC(body, '\n');
00907         }
00908     }
00909 
00910   return(body);
00911 } /* lineFileSlurpHttpBody */
00912 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2