inc/linefile.h

Go to the documentation of this file.
00001 /* lineFile - stuff to rapidly read text files and parse them into
00002  * lines. 
00003  *
00004  * This file is copyright 2002 Jim Kent, but license is hereby
00005  * granted for all use - public, private or commercial. */
00006 
00007 #ifndef LINEFILE_H
00008 #define LINEFILE_H
00009 
00010 enum nlType {
00011  nlt_undet, /* undetermined */
00012  nlt_unix,  /* lf   */
00013  nlt_dos,   /* crlf */
00014  nlt_mac    /* cr   */
00015 };
00016 
00017 struct metaOutput
00018 /* struct to store list of file handles to output meta data to 
00019  * meta data is text after # */
00020     {
00021     struct metaOutput *next;    /* next file handle */
00022     FILE *metaFile;             /* file to write metadata to */
00023     };
00024 
00025 struct lineFile
00026 /* Structure to handle fast, line oriented
00027  * fileIo. */
00028     {
00029     struct lineFile *next;      /* Might need to be on a list. */
00030     char *fileName;             /* Name of file. */
00031     int fd;                     /* File handle.  -1 for 'memory' files. */
00032     int bufSize;                /* Size of buffer. */
00033     off_t bufOffsetInFile;      /* Offset in file of first buffer byte. */
00034     int bytesInBuf;             /* Bytes read into buffer. */
00035     int reserved;               /* Reserved (zero for now). */
00036     int lineIx;                 /* Current line. */
00037     int lineStart;              /* Offset of line in buffer. */
00038     int lineEnd;                /* End of line in buffer. */
00039     bool zTerm;                 /* Replace '\n' with zero? */
00040     enum nlType nlType;         /* type of line endings: dos, unix, mac or undet */  
00041     bool reuse;                 /* Set if reusing input. */
00042     char *buf;                  /* Buffer. */
00043     struct pipeline *pl;        /* pipeline if reading compressed */
00044     struct metaOutput *metaOutput;   /* list of FILE handles to write metaData to */
00045     bool isMetaUnique;          /* if set, do not repeat comments in output */
00046     struct hash *metaLines;     /* save lines to suppress repetition */
00047     };
00048 
00049 char *getFileNameFromHdrSig(char *m);
00050 /* Check if header has signature of supported compression stream,
00051    and return a phoney filename for it, or NULL if no sig found. */
00052 
00053 struct lineFile *lineFileDecompressFd(char *name, bool zTerm, int fd);
00054 /* open a linefile with decompression from a file or socket descriptor */
00055 
00056 struct lineFile *lineFileDecompressMem(bool zTerm, char *mem, long size);
00057 /* open a linefile with decompression from a memory stream */
00058 
00059 struct lineFile *lineFileMayOpen(char *fileName, bool zTerm);
00060 /* Try and open up a lineFile. If fileName ends in .gz, .Z, or .bz2,
00061  * it will be read from a decompress pipeline. */
00062 
00063 struct lineFile *lineFileOpen(char *fileName, bool zTerm);
00064 /* Open up a lineFile or die trying If fileName ends in .gz, .Z, or .bz2,
00065  * it will be read from a decompress pipeline.. */
00066 
00067 struct lineFile *lineFileAttach(char *fileName, bool zTerm, int fd);
00068 /* Wrap a line file around an open'd file. */
00069 
00070 struct lineFile *lineFileStdin(bool zTerm);
00071 /* Wrap a line file around stdin. */
00072 
00073 struct lineFile *lineFileOnString(char *name, bool zTerm, char *s);
00074 /* Wrap a line file object around string in memory. This buffer
00075  * have zeroes written into it if zTerm is non-zero.  It will
00076  * be freed when the line file is closed. */
00077 
00078 void lineFileClose(struct lineFile **pLf);
00079 /* Close up a line file. */
00080 
00081 void lineFileCloseList(struct lineFile **pList);
00082 /* Close up a list of line files. */
00083 
00084 boolean lineFileNext(struct lineFile *lf, char **retStart, int *retSize);
00085 /* Fetch next line from file. */
00086 
00087 boolean lineFileNextReal(struct lineFile *lf, char **retStart);
00088 /* Fetch next line from file that is not blank and 
00089  * does not start with a '#'. */
00090 
00091 void lineFileNeedNext(struct lineFile *lf, char **retStart, int *retSize);
00092 /* Fetch next line from file.  Squawk and die if it's not there. */
00093 
00094 void lineFileReuse(struct lineFile *lf);
00095 /* Reuse current line. */
00096 
00097 #define lineFileString(lf) ((lf)->buf + (lf)->lineStart)
00098 /* Current string in line file. */
00099 
00100 #define lineFileTell(lf) ((lf)->bufOffsetInFile + (lf)->lineStart)
00101 /* Current offset (of string start) in file. */
00102 
00103 void lineFileSeek(struct lineFile *lf, off_t offset, int whence);
00104 /* Seek to read next line from given position. */
00105 
00106 void lineFileAbort(struct lineFile *lf, char *format, ...)
00107 /* Print file name, line number, and error message, and abort. */
00108 #if defined(__GNUC__) && defined(JK_WARN)
00109 __attribute__((format(printf, 2, 3)))
00110 #endif
00111 ;
00112 
00113 void lineFileVaAbort(struct lineFile *lf, char *format, va_list args);
00114 /* Print file name, line number, and error message, and abort. */
00115  
00116 void lineFileUnexpectedEnd(struct lineFile *lf);
00117 /* Complain about unexpected end of file. */
00118 
00119 void lineFileExpectWords(struct lineFile *lf, int expecting, int got);
00120 /* Check line has right number of words. */
00121 
00122 void lineFileExpectAtLeast(struct lineFile *lf, int expecting, int got);
00123 /* Check line has right number of words. */
00124 
00125 void lineFileShort(struct lineFile *lf);
00126 /* Complain that line is too short. */
00127 
00128 boolean lineFileNextRow(struct lineFile *lf, char *words[], int wordCount);
00129 /* Return next non-blank line that doesn't start with '#' chopped into words.
00130  * Returns FALSE at EOF.  Aborts on error. */
00131 
00132 #define lineFileRow(lf, words) lineFileNextRow(lf, words, ArraySize(words))
00133 /* Read in line chopped into fixed size word array. */
00134 
00135 boolean lineFileNextCharRow(struct lineFile *lf, char sep, char *words[], int wordCount);
00136 /* Return next non-blank line that doesn't start with '#' chopped into words
00137  * delimited by sep. Returns FALSE at EOF.  Aborts on error. */
00138 
00139 boolean lineFileNextRowTab(struct lineFile *lf, char *words[], int wordCount);
00140 /* Return next non-blank line that doesn't start with '#' chopped into words
00141  * at tabs. Returns FALSE at EOF.  Aborts on error. */
00142 
00143 #define lineFileRowTab(lf, words) \
00144         lineFileNextRowTab(lf, words, ArraySize(words))
00145 /* Read in line chopped by tab into fixed size word array. */
00146 
00147 int lineFileChopNext(struct lineFile *lf, char *words[], int maxWords);
00148 /* Return next non-blank line that doesn't start with '#' chopped into words. */
00149 
00150 #define lineFileChop(lf, words) lineFileChopNext(lf, words, ArraySize(words))
00151 /* Ease-of-usef macro for lineFileChopNext above. */
00152 
00153 int lineFileChopCharNext(struct lineFile *lf, char sep, char *words[], int maxWords);
00154 /* Return next non-blank line that doesn't start with '#' chopped into
00155    words delimited by sep. */
00156 
00157 int lineFileChopNextTab(struct lineFile *lf, char *words[], int maxWords);
00158 /* Return next non-blank line that doesn't start with '#' chopped into words
00159  * on tabs */
00160 
00161 #define lineFileChopTab(lf, words) lineFileChopNextTab(lf, words, ArraySize(words))
00162 /* Ease-of-usef macro for lineFileChopNext above. */
00163 
00164 int lineFileNeedNum(struct lineFile *lf, char *words[], int wordIx);
00165 /* Make sure that words[wordIx] is an ascii integer, and return
00166  * binary representation of it. */
00167 
00168 int lineFileNeedFullNum(struct lineFile *lf, char *words[], int wordIx);
00169 /* Make sure that words[wordIx] is an ascii integer, and return
00170  * binary representation of it. Require all chars in word to be digits.*/
00171 
00172 double lineFileNeedDouble(struct lineFile *lf, char *words[], int wordIx);
00173 /* Make sure that words[wordIx] is an ascii double value, and return
00174  * binary representation of it. */
00175 
00176 void lineFileSkip(struct lineFile *lf, int lineCount);
00177 /* Skip a number of lines. */
00178 
00179 char *lineFileSkipToLineStartingWith(struct lineFile *lf, char *start, int maxCount);
00180 /* Skip to next line that starts with given string.  Return NULL
00181  * if no such line found, otherwise return the line. */
00182 
00183 boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr,
00184                                 boolean *chunked, int *contentLength);
00185 /* Extract HTTP response header from lf into hdr, tell if it's 
00186  * "Transfer-Encoding: chunked" or if it has a contentLength. */
00187 
00188 struct dyString *lineFileSlurpHttpBody(struct lineFile *lf,
00189                                        boolean chunked, int contentLength);
00190 /* Return a dyString that contains the http response body in lf.  Handle 
00191  * chunk-encoding and content-length. */
00192 
00193 void lineFileSetMetaDataOutput(struct lineFile *lf, FILE *f);
00194 /* set file to write meta data to,
00195  * should be called before reading from input file */
00196 
00197 void lineFileSetUniqueMetaData(struct lineFile *lf);
00198 /* suppress duplicate lines in metadata */
00199 
00200 #endif /* LINEFILE_H */
00201 
00202 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2